Pulse/internal/ai/eval/patrol.go
rcourtman 9b0fb527f5 feat(patrol): implement patrol findings, evaluation, and investigation logic
- Add core Patrol system for automated investigations
- Implement findings management and deduplication logic
- Add evaluation framework (patrol_eval) with quality assertions and scenarios
- Add patrol-specific tools and executor integration
- Add E2E test matrix script
2026-01-31 16:23:08 +00:00

731 lines
20 KiB
Go

package eval
import (
"bufio"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"sync"
"time"
)
// PatrolScenario defines a patrol eval scenario.
type PatrolScenario struct {
Name string
Description string
Setup func(r *Runner) error // optional pre-run setup
Teardown func(r *Runner) error // optional post-run cleanup
Assertions []PatrolAssertion
Timeout time.Duration // default 5m
}
// PatrolRunResult captures complete patrol execution trace.
type PatrolRunResult struct {
ScenarioName string
Success bool
Error error
Duration time.Duration
ToolCalls []ToolCallEvent
Findings []PatrolFinding
Content string
RawEvents []PatrolSSEEvent
Assertions []AssertionResult
Completed bool // true if patrol reported completion via status API
Quality *PatrolQualityReport
}
// PatrolFinding mirrors the Finding JSON from the API.
type PatrolFinding struct {
ID string `json:"id"`
Key string `json:"key"`
Severity string `json:"severity"`
Category string `json:"category"`
ResourceID string `json:"resource_id"`
ResourceName string `json:"resource_name"`
ResourceType string `json:"resource_type"`
Title string `json:"title"`
Description string `json:"description"`
Recommendation string `json:"recommendation"`
Evidence string `json:"evidence"`
}
// PatrolAssertion checks a PatrolRunResult.
type PatrolAssertion func(result *PatrolRunResult) AssertionResult
// PatrolSSEEvent represents a raw SSE event from the patrol stream.
type PatrolSSEEvent struct {
Type string `json:"type"`
Content string `json:"content,omitempty"`
Phase string `json:"phase,omitempty"`
Tokens int `json:"tokens,omitempty"`
ToolID string `json:"tool_id,omitempty"`
ToolName string `json:"tool_name,omitempty"`
ToolInput string `json:"tool_input,omitempty"`
ToolRawInput string `json:"tool_raw_input,omitempty"`
ToolOutput string `json:"tool_output,omitempty"`
ToolSuccess *bool `json:"tool_success,omitempty"`
}
// RunPatrolScenario executes a patrol scenario and returns the results.
//
// Strategy: trigger the patrol run, then use a dual approach:
// 1. Poll GET /api/ai/patrol/status until Running=false (primary completion signal)
// 2. Attempt to connect to the SSE stream in a goroutine to capture tool events
//
// The SSE stream may or may not connect depending on timing (the server only
// sends HTTP headers once it has data). We treat the stream as best-effort
// for tool-level visibility, and rely on polling for completion.
func (r *Runner) RunPatrolScenario(scenario PatrolScenario) PatrolRunResult {
startTime := time.Now()
result := PatrolRunResult{
ScenarioName: scenario.Name,
Success: true,
}
timeout := scenario.Timeout
if timeout == 0 {
timeout = 5 * time.Minute
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
// Optional patrol model override (e.g., to force a cheaper model for evals)
restoreModel, overrideErr := r.applyPatrolModelOverride(ctx)
if overrideErr != nil {
result.Error = fmt.Errorf("patrol model override failed: %w", overrideErr)
result.Success = false
result.Duration = time.Since(startTime)
return result
}
if restoreModel != nil {
defer restoreModel()
}
// Run optional setup
if scenario.Setup != nil {
if err := scenario.Setup(r); err != nil {
result.Error = fmt.Errorf("setup failed: %w", err)
result.Success = false
result.Duration = time.Since(startTime)
return result
}
}
// Run optional teardown on exit
if scenario.Teardown != nil {
defer func() {
if err := scenario.Teardown(r); err != nil {
fmt.Printf(" [WARN] Teardown error: %v\n", err)
}
}()
}
// Wait for patrol to be idle before starting
if err := r.waitForPatrolIdle(ctx); err != nil {
result.Error = fmt.Errorf("waiting for patrol idle: %w", err)
result.Success = false
result.Duration = time.Since(startTime)
return result
}
// Trigger patrol run
if err := r.triggerPatrolRun(); err != nil {
result.Error = fmt.Errorf("triggering patrol run: %w", err)
result.Success = false
result.Duration = time.Since(startTime)
return result
}
if r.config.Verbose {
fmt.Printf(" Patrol triggered\n")
}
// Start SSE stream reader in background goroutine.
// This captures tool events if the stream connects. It's best-effort.
var streamMu sync.Mutex
var streamEvents []PatrolSSEEvent
var streamToolCalls []ToolCallEvent
var streamContent strings.Builder
var streamConnected bool
streamCtx, streamCancel := context.WithCancel(ctx)
defer streamCancel()
streamDone := make(chan struct{})
go func() {
defer close(streamDone)
body, err := r.connectPatrolStream(streamCtx)
if err != nil {
// Stream didn't connect — that's OK, we still poll for completion
if r.config.Verbose && streamCtx.Err() == nil {
fmt.Printf(" [SSE] Could not connect to stream: %v\n", err)
}
return
}
defer body.Close()
streamMu.Lock()
streamConnected = true
streamMu.Unlock()
if r.config.Verbose {
fmt.Printf(" [SSE] Connected to patrol stream\n")
}
events, toolCalls, content, _ := r.parsePatrolSSEStream(streamCtx, body)
streamMu.Lock()
streamEvents = events
streamToolCalls = toolCalls
streamContent.WriteString(content)
streamMu.Unlock()
}()
// Poll for completion (primary mechanism)
completed, pollErr := r.waitForPatrolComplete(ctx)
result.Completed = completed
// Cancel the stream goroutine and wait for it to finish
streamCancel()
<-streamDone
if pollErr != nil {
result.Error = pollErr
result.Success = false
}
// Collect stream results
streamMu.Lock()
result.RawEvents = streamEvents
result.ToolCalls = streamToolCalls
result.Content = streamContent.String()
if streamConnected && r.config.Verbose {
fmt.Printf(" [SSE] Captured %d events, %d tool calls\n", len(streamEvents), len(streamToolCalls))
} else if !streamConnected && r.config.Verbose {
fmt.Printf(" [SSE] Stream did not connect (tool events not captured)\n")
}
streamMu.Unlock()
// Fetch findings from REST API
findings, findErr := r.fetchPatrolFindings()
if findErr != nil {
if result.Error == nil {
result.Error = fmt.Errorf("fetching findings: %w", findErr)
}
}
result.Findings = mergeFindingsFromToolCalls(findings, result.ToolCalls)
if r.config.Verbose && len(findings) > 0 {
fmt.Printf(" Fetched %d findings from API\n", len(findings))
}
// Compute quality metrics (best-effort)
result.Quality = EvaluatePatrolQuality(&result)
// Run assertions
for _, assertion := range scenario.Assertions {
assertResult := assertion(&result)
result.Assertions = append(result.Assertions, assertResult)
if !assertResult.Passed {
result.Success = false
}
}
result.Duration = time.Since(startTime)
return result
}
func mergeFindingsFromToolCalls(findings []PatrolFinding, toolCalls []ToolCallEvent) []PatrolFinding {
if len(toolCalls) == 0 {
return findings
}
byID := make(map[string]PatrolFinding, len(findings))
for _, f := range findings {
if f.ID == "" {
continue
}
byID[f.ID] = f
}
for _, tc := range toolCalls {
if tc.Name != "patrol_get_findings" || strings.TrimSpace(tc.Output) == "" {
continue
}
var payload struct {
Findings []struct {
ID string `json:"id"`
Key string `json:"key"`
Severity string `json:"severity"`
Category string `json:"category"`
ResourceID string `json:"resource_id"`
ResourceName string `json:"resource_name"`
ResourceType string `json:"resource_type"`
Title string `json:"title"`
Description string `json:"description"`
Recommendation string `json:"recommendation,omitempty"`
Evidence string `json:"evidence,omitempty"`
} `json:"findings"`
}
if err := json.Unmarshal([]byte(tc.Output), &payload); err != nil {
continue
}
for _, info := range payload.Findings {
if info.ID == "" {
continue
}
if _, exists := byID[info.ID]; exists {
continue
}
byID[info.ID] = PatrolFinding{
ID: info.ID,
Key: info.Key,
Severity: info.Severity,
Category: info.Category,
ResourceID: info.ResourceID,
ResourceName: info.ResourceName,
ResourceType: info.ResourceType,
Title: info.Title,
Description: info.Description,
Recommendation: info.Recommendation,
Evidence: info.Evidence,
}
}
}
if len(byID) == len(findings) {
return findings
}
merged := make([]PatrolFinding, 0, len(byID))
for _, f := range byID {
merged = append(merged, f)
}
return merged
}
// waitForPatrolIdle polls GET /api/ai/patrol/status until Running=false.
func (r *Runner) waitForPatrolIdle(ctx context.Context) error {
for {
select {
case <-ctx.Done():
return fmt.Errorf("timeout waiting for patrol idle")
default:
}
req, err := http.NewRequestWithContext(ctx, "GET", r.config.BaseURL+"/api/ai/patrol/status", nil)
if err != nil {
return err
}
req.SetBasicAuth(r.config.Username, r.config.Password)
resp, err := r.client.Do(req)
if err != nil {
return err
}
var status struct {
Running bool `json:"running"`
}
json.NewDecoder(resp.Body).Decode(&status)
resp.Body.Close()
if !status.Running {
return nil
}
if r.config.Verbose {
fmt.Printf(" Patrol is running, waiting...\n")
}
select {
case <-ctx.Done():
return fmt.Errorf("timeout waiting for patrol idle")
case <-time.After(3 * time.Second):
}
}
}
// waitForPatrolComplete polls status until patrol finishes (Running transitions
// from true back to false). Returns true if patrol completed, false on timeout.
func (r *Runner) waitForPatrolComplete(ctx context.Context) (bool, error) {
// First, wait briefly for patrol to actually start (Running=true)
sawRunning := false
for i := 0; i < 10; i++ {
select {
case <-ctx.Done():
return false, fmt.Errorf("timeout waiting for patrol to start")
case <-time.After(1 * time.Second):
}
running, healthy, err := r.getPatrolStatus(ctx)
if err != nil {
continue
}
if running {
sawRunning = true
if r.config.Verbose {
fmt.Printf(" Patrol is running...\n")
}
break
}
// If not running and we see a recent completion, maybe it finished instantly
if !running && i > 2 {
if r.config.Verbose {
fmt.Printf(" Patrol not running (may have completed quickly), healthy=%v\n", healthy)
}
return true, nil
}
}
if !sawRunning {
// May have completed extremely fast, check findings to verify it ran
if r.config.Verbose {
fmt.Printf(" Never saw patrol running state (may have completed instantly)\n")
}
return true, nil
}
// Now poll until Running=false (patrol completed)
for {
select {
case <-ctx.Done():
return false, fmt.Errorf("timeout waiting for patrol to complete")
case <-time.After(3 * time.Second):
}
running, healthy, err := r.getPatrolStatus(ctx)
if err != nil {
if r.config.Verbose {
fmt.Printf(" Status poll error: %v\n", err)
}
continue
}
if !running {
if r.config.Verbose {
fmt.Printf(" Patrol completed (healthy=%v)\n", healthy)
}
return true, nil
}
if r.config.Verbose {
fmt.Printf(" Still running...\n")
}
}
}
// getPatrolStatus returns (running, healthy, error) from the status endpoint.
func (r *Runner) getPatrolStatus(ctx context.Context) (bool, bool, error) {
req, err := http.NewRequestWithContext(ctx, "GET", r.config.BaseURL+"/api/ai/patrol/status", nil)
if err != nil {
return false, false, err
}
req.SetBasicAuth(r.config.Username, r.config.Password)
resp, err := r.client.Do(req)
if err != nil {
return false, false, err
}
defer resp.Body.Close()
var status struct {
Running bool `json:"running"`
Healthy bool `json:"healthy"`
}
if err := json.NewDecoder(resp.Body).Decode(&status); err != nil {
return false, false, err
}
return status.Running, status.Healthy, nil
}
// triggerPatrolRun triggers POST /api/ai/patrol/run.
func (r *Runner) triggerPatrolRun() error {
url := r.config.BaseURL + "/api/ai/patrol/run"
req, err := http.NewRequest("POST", url, nil)
if err != nil {
return err
}
req.SetBasicAuth(r.config.Username, r.config.Password)
resp, err := r.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("patrol run returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
}
return nil
}
// fetchPatrolFindings fetches GET /api/ai/patrol/findings.
func (r *Runner) fetchPatrolFindings() ([]PatrolFinding, error) {
req, err := http.NewRequest("GET", r.config.BaseURL+"/api/ai/patrol/findings", nil)
if err != nil {
return nil, err
}
req.SetBasicAuth(r.config.Username, r.config.Password)
resp, err := r.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("findings returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
}
var findings []PatrolFinding
if err := json.NewDecoder(resp.Body).Decode(&findings); err != nil {
return nil, fmt.Errorf("decoding findings: %w", err)
}
return findings, nil
}
// connectPatrolStream opens GET /api/ai/patrol/stream SSE connection.
func (r *Runner) connectPatrolStream(ctx context.Context) (io.ReadCloser, error) {
req, err := http.NewRequestWithContext(ctx, "GET", r.config.BaseURL+"/api/ai/patrol/stream", nil)
if err != nil {
return nil, err
}
req.Header.Set("Accept", "text/event-stream")
req.SetBasicAuth(r.config.Username, r.config.Password)
// Use a client without the default timeout for streaming
streamClient := &http.Client{}
resp, err := streamClient.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
resp.Body.Close()
return nil, fmt.Errorf("stream returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
}
return resp.Body, nil
}
// parsePatrolSSEStream reads patrol SSE events and extracts tool calls + content.
// Patrol SSE events are flat JSON objects (not nested like chat SSE).
func (r *Runner) parsePatrolSSEStream(ctx context.Context, body io.Reader) ([]PatrolSSEEvent, []ToolCallEvent, string, error) {
var events []PatrolSSEEvent
var toolCalls []ToolCallEvent
var contentBuilder strings.Builder
// Track tool calls in progress (by ID)
toolCallsInProgress := make(map[string]*ToolCallEvent)
scanner := bufio.NewScanner(body)
const maxSSEEventSize = 8 * 1024 * 1024
scanner.Buffer(make([]byte, 0, 64*1024), maxSSEEventSize)
done := make(chan struct{})
var scanErr error
go func() {
defer close(done)
for scanner.Scan() {
line := scanner.Text()
if !strings.HasPrefix(line, "data: ") {
continue
}
data := strings.TrimPrefix(line, "data: ")
if data == "" {
continue
}
var event PatrolSSEEvent
if err := json.Unmarshal([]byte(data), &event); err != nil {
continue
}
events = append(events, event)
if r.config.Verbose {
switch event.Type {
case "start":
fmt.Printf(" [SSE] Patrol started\n")
case "phase":
fmt.Printf(" [SSE] Phase: %s\n", event.Phase)
case "tool_start":
fmt.Printf(" [SSE] Tool start: %s\n", event.ToolName)
case "tool_end":
status := "OK"
if event.ToolSuccess != nil && !*event.ToolSuccess {
status = "FAILED"
}
fmt.Printf(" [SSE] Tool end: %s [%s]\n", event.ToolName, status)
case "complete":
fmt.Printf(" [SSE] Patrol complete\n")
case "error":
fmt.Printf(" [SSE] Error: %s\n", event.Content)
}
}
switch event.Type {
case "content":
contentBuilder.WriteString(event.Content)
case "tool_start":
input := event.ToolInput
if event.ToolRawInput != "" {
input = event.ToolRawInput
}
toolCallsInProgress[event.ToolID] = &ToolCallEvent{
ID: event.ToolID,
Name: event.ToolName,
Input: input,
}
case "tool_end":
success := event.ToolSuccess != nil && *event.ToolSuccess
if tc, ok := toolCallsInProgress[event.ToolID]; ok {
input := event.ToolInput
if event.ToolRawInput != "" {
input = event.ToolRawInput
}
if tc.Input == "" && input != "" {
tc.Input = input
}
tc.Output = event.ToolOutput
tc.Success = success
toolCalls = append(toolCalls, *tc)
delete(toolCallsInProgress, event.ToolID)
} else {
input := event.ToolInput
if event.ToolRawInput != "" {
input = event.ToolRawInput
}
// tool_end without matching tool_start
toolCalls = append(toolCalls, ToolCallEvent{
ID: event.ToolID,
Name: event.ToolName,
Input: input,
Output: event.ToolOutput,
Success: success,
})
}
case "complete":
return
case "error":
scanErr = fmt.Errorf("patrol error: %s", event.Content)
return
}
}
if err := scanner.Err(); err != nil {
scanErr = err
}
}()
select {
case <-ctx.Done():
return events, toolCalls, contentBuilder.String(), nil // context cancel is expected
case <-done:
return events, toolCalls, contentBuilder.String(), scanErr
}
}
// PrintPatrolSummary prints a summary of the patrol run result.
func (r *Runner) PrintPatrolSummary(result PatrolRunResult) {
fmt.Printf("\n")
fmt.Printf("========================================\n")
fmt.Printf("PATROL SCENARIO: %s\n", result.ScenarioName)
fmt.Printf("========================================\n")
fmt.Printf("Duration: %v\n", result.Duration)
fmt.Printf("Completed: %v\n", result.Completed)
if result.Error != nil {
fmt.Printf("ERROR: %v\n", result.Error)
}
if len(result.ToolCalls) > 0 {
fmt.Printf("\nTool Calls (%d):\n", len(result.ToolCalls))
for _, tc := range result.ToolCalls {
status := "OK"
if !tc.Success {
status = "FAILED"
}
fmt.Printf(" - %s [%s]: %s\n", tc.Name, status, truncate(tc.Input, 80))
if !tc.Success || r.config.Verbose {
fmt.Printf(" Output: %s\n", truncate(tc.Output, 200))
}
}
}
if len(result.Findings) > 0 {
fmt.Printf("\nFindings (%d):\n", len(result.Findings))
for _, f := range result.Findings {
fmt.Printf(" - [%s] %s: %s\n", f.Severity, f.Key, f.Title)
}
}
if result.Quality != nil {
q := result.Quality
fmt.Printf("\nQuality:\n")
if q.CoverageKnown {
if q.SignalsTotal > 0 {
fmt.Printf(" Signal coverage: %d/%d (%.0f%%)\n", q.SignalsMatched, q.SignalsTotal, q.SignalCoverage*100)
} else {
fmt.Printf(" Signal coverage: no signals detected\n")
}
} else {
fmt.Printf(" Signal coverage: unknown (no tool calls captured)\n")
}
if r.config.Verbose && len(q.Signals) > 0 {
fmt.Printf(" Signals:\n")
for _, s := range q.Signals {
status := "MISS"
if s.Matched {
status = "MATCH"
}
fmt.Printf(" - [%s] %s on %s (%s)\n", status, s.SignalType, s.ResourceID, s.Category)
}
}
}
if result.Content != "" && r.config.Verbose {
fmt.Printf("\nContent:\n%s\n", truncate(result.Content, 500))
}
if len(result.Assertions) > 0 {
fmt.Printf("\nAssertions:\n")
for _, a := range result.Assertions {
status := "PASS"
if !a.Passed {
status = "FAIL"
}
fmt.Printf(" [%s] %s: %s\n", status, a.Name, a.Message)
}
}
if result.Success {
fmt.Printf("\nResult: PASSED\n")
} else {
fmt.Printf("\nResult: FAILED\n")
if len(result.Assertions) > 0 {
fmt.Printf("\nFailures:\n")
for _, a := range result.Assertions {
if !a.Passed {
fmt.Printf(" Assertion '%s': %s\n", a.Name, a.Message)
}
}
}
}
fmt.Printf("========================================\n")
}