Pulse/internal/ai/eval/scenario_test.go

166 lines
4.6 KiB
Go

package eval
import (
"fmt"
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
)
func TestRunner_RunScenario_Success(t *testing.T) {
// Mock server that returns different responses based on request count (or prompt)
calls := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
calls++
w.Header().Set("Content-Type", "text/event-stream")
if calls == 1 {
// Step 1 response
fmt.Fprintf(w, "data: {\"type\":\"content\",\"data\":{\"text\":\"Step 1 done\"}}\n\n")
fmt.Fprintf(w, "data: {\"type\":\"done\",\"data\":{\"session_id\":\"session-1\"}}\n\n")
return
}
if calls == 2 {
// Step 2 response (uses same session)
// Check prompt or headers if needed, but simple counter is enough for flow test
fmt.Fprintf(w, "data: {\"type\":\"content\",\"data\":{\"text\":\"Step 2 done\"}}\n\n")
fmt.Fprintf(w, "data: {\"type\":\"done\",\"data\":{\"session_id\":\"session-1\"}}\n\n")
return
}
// Should not reach here
w.WriteHeader(http.StatusInternalServerError)
}))
defer server.Close()
runner := NewRunner(DefaultConfig())
runner.config.BaseURL = server.URL
runner.config.Verbose = false
runner.config.Preflight = false // Disable preflight for this specific test
scenario := Scenario{
Name: "Multi-Step Success",
Steps: []Step{
{
Name: "Step 1",
Prompt: "First",
Assertions: []Assertion{
AssertContentContains("Step 1"),
},
},
{
Name: "Step 2",
Prompt: "Second",
Assertions: []Assertion{
AssertContentContains("Step 2"),
},
},
},
}
result := runner.RunScenario(scenario)
assert.True(t, result.Passed)
assert.Len(t, result.Steps, 2)
assert.Equal(t, "Step 1 done", result.Steps[0].Content)
assert.Equal(t, "Step 2 done", result.Steps[1].Content)
assert.Equal(t, "session-1", result.Steps[1].SessionID) // Should propagate
}
func TestRunner_RunScenario_AssertionFailure(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/event-stream")
fmt.Fprintf(w, "data: {\"type\":\"content\",\"data\":{\"text\":\"Step done\"}}\n\n")
fmt.Fprintf(w, "data: {\"type\":\"done\",\"data\":{\"session_id\":\"session-1\"}}\n\n")
}))
defer server.Close()
runner := NewRunner(DefaultConfig())
runner.config.BaseURL = server.URL
scenario := Scenario{
Name: "Assertion Failure",
Steps: []Step{
{
Name: "Step 1",
Prompt: "Run",
Assertions: []Assertion{
func(result *StepResult) AssertionResult {
return AssertionResult{Name: "FailAlways", Passed: false, Message: "Boom"}
},
},
},
{
Name: "Step 2",
Prompt: "Should not run",
},
},
}
result := runner.RunScenario(scenario)
assert.False(t, result.Passed)
assert.Len(t, result.Steps, 1) // Should stop after first step
assert.False(t, result.Steps[0].Success)
}
func TestRunner_RunScenario_WithPreflight(t *testing.T) {
calls := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
calls++
w.Header().Set("Content-Type", "text/event-stream")
// Preflight and Step 1 return same simple response
fmt.Fprintf(w, "data: {\"type\":\"content\",\"data\":{\"text\":\"Hello\"}}\n\n")
fmt.Fprintf(w, "data: {\"type\":\"done\",\"data\":{\"session_id\":\"session-1\"}}\n\n")
}))
defer server.Close()
runner := NewRunner(DefaultConfig())
runner.config.BaseURL = server.URL
runner.config.Preflight = true
scenario := Scenario{
Name: "Preflight Scenario",
Steps: []Step{
{Name: "Step 1", Prompt: "Hi"},
},
}
result := runner.RunScenario(scenario)
assert.True(t, result.Passed)
// Steps should include Preflight + Step 1 = 2 steps
assert.Len(t, result.Steps, 2)
assert.Equal(t, "Preflight", result.Steps[0].StepName)
assert.Equal(t, "Step 1", result.Steps[1].StepName)
}
func TestRunner_RunScenario_PreflightFail(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Fail connection
w.WriteHeader(http.StatusInternalServerError)
}))
defer server.Close()
runner := NewRunner(DefaultConfig())
runner.config.BaseURL = server.URL
runner.config.Preflight = true
runner.config.StepRetries = 0 // Fail fast
scenario := Scenario{
Name: "Preflight Fail",
Steps: []Step{
{Name: "Step 1", Prompt: "Hi"},
},
}
result := runner.RunScenario(scenario)
assert.False(t, result.Passed)
assert.Len(t, result.Steps, 1) // Only preflight step captured
assert.Equal(t, "Preflight", result.Steps[0].StepName)
assert.False(t, result.Steps[0].Success)
}