Pulse/internal/ai/eval/runner_additional_test.go

package eval

import (
	"encoding/json"
	"fmt"
	"net/http"
	"net/http/httptest"
	"os"
	"strings"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

// TestRunner_ParseSSEStream_Comprehensive tests the parsing of various SSE events and scenarios
func TestRunner_ParseSSEStream_Comprehensive(t *testing.T) {
	runner := &Runner{
		config: DefaultConfig(),
	}

	tests := []struct {
		name              string
		inputBody         string
		expectedContent   string
		expectedTools     int
		expectedApprovals int
		expectError       bool
		errorContains     string
	}{
		{
			name: "Standard flow with content and done",
			inputBody: `data: {"type":"content","data":{"text":"Hello"}}
data: {"type":"content","data":{"text":" World"}}
data: {"type":"done","data":{"session_id":"sess-1","input_tokens":10,"output_tokens":5}}
`,
			expectedContent: "Hello World",
		},
		{
			name: "Tool call flow",
			inputBody: `data: {"type":"tool_start","data":{"id":"call-1","name":"test_tool","input":"{\"arg\":\"val\"}"}}
data: {"type":"tool_end","data":{"id":"call-1","name":"test_tool","output":"result","success":true}}
data: {"type":"done","data":{}}
`,
			expectedTools: 1,
		},
		{
			name: "Interleaved content and tools",
			inputBody: `data: {"type":"content","data":{"text":"Using tool..."}}
data: {"type":"tool_start","data":{"id":"call-1","name":"t1","input":""}}
data: {"type":"tool_end","data":{"id":"call-1","name":"t1","output":"ok","success":true}}
data: {"type":"content","data":{"text":" Done."}}
data: {"type":"done","data":{}}
`,
			expectedContent: "Using tool... Done.",
			expectedTools:   1,
		},
		{
			name: "Approval needed event",
			inputBody: `data: {"type":"approval_needed","data":{"approval_id":"app-1","tool_id":"call-2","tool_name":"dangerous_tool","risk":"high"}}
data: {"type":"done","data":{}}
`,
			expectedApprovals: 1,
		},
		{
			name: "Stream error event",
			inputBody: `data: {"type":"error","data":{"message":"something went wrong"}}
`,
			expectError:   true,
			errorContains: "something went wrong",
		},
		{
			name: "Raw string error event",
			inputBody: `data: {"type":"error","data":"raw error message"}
`,
			expectError:   true,
			errorContains: "raw error message",
		},
		{
			name: "Malformed JSON (should be ignored/handled gracefully)",
			inputBody: `data: {invalid-json}
data: {"type":"content","data":{"text":"Still working"}}
data: {"type":"done","data":{}}
`,
			expectedContent: "Still working",
		},
		{
			name: "Multiple tool calls",
			inputBody: `data: {"type":"tool_start","data":{"id":"1","name":"t1","input":"i1"}}
data: {"type":"tool_start","data":{"id":"2","name":"t2","input":"i2"}}
data: {"type":"tool_end","data":{"id":"1","output":"o1","success":true}}
data: {"type":"tool_end","data":{"id":"2","output":"o2","success":false}}
data: {"type":"done","data":{}}
`,
			expectedTools: 2,
		},
	}

	for _, tc := range tests {
		t.Run(tc.name, func(t *testing.T) {
			_, tools, approvals, content, _, _, _, _, err := runner.parseSSEStream(
				strings.NewReader(tc.inputBody),
				ApprovalNone,
				"",
			)

			if tc.expectError {
				require.Error(t, err)
				if tc.errorContains != "" {
					assert.Contains(t, err.Error(), tc.errorContains)
				}
			} else {
				require.NoError(t, err)
				assert.Equal(t, tc.expectedContent, content)
				assert.Len(t, tools, tc.expectedTools)
				assert.Len(t, approvals, tc.expectedApprovals)
			}
		})
	}
}

func TestRunner_HandleApprovalDecision(t *testing.T) {
	// Setup a mock server to handle approval requests
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		// Basic auth check
		u, p, ok := r.BasicAuth()
		if !ok || u != "admin" || p != "admin" {
			w.WriteHeader(http.StatusUnauthorized)
			return
		}

		if strings.HasSuffix(r.URL.Path, "/approve") {
			assert.Equal(t, http.MethodPost, r.Method)
			w.WriteHeader(http.StatusOK)
			return
		}
		if strings.HasSuffix(r.URL.Path, "/deny") {
			assert.Equal(t, http.MethodPost, r.Method)
			// Check if reason payload is present
			var payload map[string]string
			if err := json.NewDecoder(r.Body).Decode(&payload); err == nil {
				if reason, ok := payload["reason"]; ok && reason == "unsafe" {
					w.WriteHeader(http.StatusOK)
					return
				}
			}
			// If we expected a reason but didn't get one or got wrong one
			w.WriteHeader(http.StatusOK)
			return
		}
		w.WriteHeader(http.StatusNotFound)
	}))
	defer server.Close()

	cfg := DefaultConfig()
	cfg.BaseURL = server.URL
	runner := NewRunner(cfg)

	// Test Approve
	err := runner.handleApprovalDecision(ApprovalApprove, "app-123", "")
	require.NoError(t, err)

	// Test Deny with Reason
	err = runner.handleApprovalDecision(ApprovalDeny, "app-456", "unsafe")
	require.NoError(t, err)

	// Test Ignore (None)
	err = runner.handleApprovalDecision(ApprovalNone, "app-789", "")
	require.NoError(t, err)

	// Test Server Error Handling (setup failure server)
	failServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		w.WriteHeader(http.StatusInternalServerError)
		w.Write([]byte("oops"))
	}))
	defer failServer.Close()

	failRunner := NewRunner(DefaultConfig())
	failRunner.config.BaseURL = failServer.URL

	err = failRunner.handleApprovalDecision(ApprovalApprove, "app-fail", "")
	require.Error(t, err)
	assert.Contains(t, err.Error(), "status 500")
}

func TestRunner_RetryLogic_Advanced(t *testing.T) {
	runner := &Runner{config: DefaultConfig()}

	// Enable all retries for testing
	runner.config.RetryOnRateLimit = true
	runner.config.RetryOnPhantom = true
	runner.config.RetryOnToolErrors = true
	runner.config.RetryOnExplicitTool = true

	tests := []struct {
		name        string
		result      *StepResult
		step        Step
		shouldRetry bool
		retryReason string
	}{
		{
			name: "Rate Limit Error",
			result: &StepResult{
				Error: fmt.Errorf("HTTP 429 Too Many Requests"),
			},
			shouldRetry: true,
			retryReason: "rate_limit",
		},
		{
			name: "Stream Parse Error",
			result: &StepResult{
				Error: fmt.Errorf("failed to parse SSE stream: unexpected EOF"),
			},
			shouldRetry: true,
			retryReason: "stream_error",
		},
		{
			name: "Phantom Detection (Phantom content, no successful tools)",
			result: &StepResult{
				Content: "I apologize, but I wasn't able to access the infrastructure tools needed to complete that request",
				ToolCalls: []ToolCallEvent{
					{Success: false},
				},
			},
			shouldRetry: true,
			retryReason: "phantom_detection",
		},
		{
			name: "Phantom Detection (Phantom content, BUT has successful tools - No Retry)",
			result: &StepResult{
				Content: "I apologize, but I wasn't able to access the infrastructure tools needed to complete that request",
				ToolCalls: []ToolCallEvent{
					{Success: true},
				},
			},
			shouldRetry: false,
		},
		{
			name: "Explicit Tool Requested but Missing",
			step: Step{Prompt: "Please use pulse_read to check files"},
			result: &StepResult{
				Content:   "some content",
				ToolCalls: []ToolCallEvent{},
			},
			shouldRetry: true,
			retryReason: "no_tool_calls_for_explicit_tool",
		},
		{
			name: "Tool Error (Retryable: timeout)",
			result: &StepResult{
				Content: "some content",
				ToolCalls: []ToolCallEvent{
					{Success: false, Output: "context deadline exceeded"},
				},
			},
			shouldRetry: true,
			retryReason: "tool_error",
		},
		{
			name: "Tool Error (Non-Retryable: routing mismatch)",
			result: &StepResult{
				Content: "some content",
				ToolCalls: []ToolCallEvent{
					{Success: false, Output: "routing_mismatch detected"},
				},
			},
			shouldRetry: false,
		},
	}

	for _, tc := range tests {
		t.Run(tc.name, func(t *testing.T) {
			retry, reason := runner.shouldRetryStep(tc.result, tc.step)
			assert.Equal(t, tc.shouldRetry, retry)
			if tc.shouldRetry {
				assert.Equal(t, tc.retryReason, reason)
			}
		})
	}
}

func TestIsRateLimitError(t *testing.T) {
	assert.True(t, isRateLimitError("429 Too Many Requests"))
	assert.True(t, isRateLimitError("Quota exceeded"))
	assert.True(t, isRateLimitError("Retry-After: 30"))
	assert.False(t, isRateLimitError("Generic error"))
}

func TestHasRetryableToolError(t *testing.T) {
	// Retryable
	assert.True(t, hasRetryableToolError([]ToolCallEvent{
		{Success: false, Output: "connection refused"},
	}))
	assert.True(t, hasRetryableToolError([]ToolCallEvent{
		{Success: false, Output: "502 Bad Gateway"},
	}))

	// Non-retryable has precedence
	assert.False(t, hasRetryableToolError([]ToolCallEvent{
		{Success: false, Output: "read_only_violation"},
	}))

	// Success means no retry needed for that tool
	assert.False(t, hasRetryableToolError([]ToolCallEvent{
		{Success: true, Output: "success"},
	}))
}

func TestRunner_ExecuteStep_FullFlow(t *testing.T) {
	// Mock a successful flow
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		w.Header().Set("Content-Type", "text/event-stream")
		fmt.Fprintf(w, "data: {\"type\":\"content\",\"data\":{\"text\":\"Step done\"}}\n\n")
		fmt.Fprintf(w, "data: {\"type\":\"done\",\"data\":{\"session_id\":\"s-1\"}}\n\n")
	}))
	defer server.Close()

	runner := NewRunner(DefaultConfig())
	runner.config.BaseURL = server.URL
	runner.config.Verbose = true // Cover verbose logging paths

	step := Step{Name: "Test Step", Prompt: "Run"}
	result := runner.executeStep(step, "")
	assert.True(t, result.Success)
	assert.Equal(t, "Step done", result.Content)
	assert.Equal(t, "s-1", result.SessionID)
}

func TestRunner_ExecuteStep_RetryFlow(t *testing.T) {
	attempts := 0
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		attempts++
		if attempts == 1 {
			// First attempt fails with rate limit
			w.WriteHeader(http.StatusTooManyRequests)
			w.Write([]byte("Rate limit exceeded"))
			return
		}
		// Second attempt succeeds
		w.Header().Set("Content-Type", "text/event-stream")
		fmt.Fprintf(w, "data: {\"type\":\"content\",\"data\":{\"text\":\"Success\"}}\n\n")
		fmt.Fprintf(w, "data: {\"type\":\"done\",\"data\":{}}\n\n")
	}))
	defer server.Close()

	runner := NewRunner(DefaultConfig())
	runner.config.BaseURL = server.URL
	runner.config.StepRetries = 2
	runner.config.RetryOnRateLimit = true
	runner.config.RateLimitCooldown = 1 * time.Millisecond // Fast retry

	result := runner.executeStep(Step{Name: "Retry Step", Prompt: "Go"}, "")

	// Should pass eventually
	assert.True(t, result.Success)
	// Should have retries recorded
	assert.True(t, result.Retries > 0)
	assert.Contains(t, result.RetryNotes, "rate_limit")
}

func TestRunner_WriteReport(t *testing.T) {
	tempDir := t.TempDir()

	cfg := DefaultConfig()
	cfg.ReportDir = tempDir
	cfg.Model = "test-model"
	runner := NewRunner(cfg)

	result := ScenarioResult{
		ScenarioName: "Test Report Scenario",
		Passed:       true,
		Steps: []StepResult{
			{StepName: "Step 1", Success: true},
		},
	}

	path, err := runner.writeReport(result)
	require.NoError(t, err)
	assert.Contains(t, path, tempDir)
	assert.Contains(t, path, "eval-test-report-scenario-test-model")
	assert.FileExists(t, path)

	// Verify content
	content, err := os.ReadFile(path)
	require.NoError(t, err)
	assert.Contains(t, string(content), "Test Report Scenario")
}

func TestEnvHelpers(t *testing.T) {
	// Bool
	os.Setenv("TEST_BOOL_TRUE", "true")
	os.Setenv("TEST_BOOL_1", "1")
	os.Setenv("TEST_BOOL_FALSE", "false")
	defer os.Unsetenv("TEST_BOOL_TRUE")
	defer os.Unsetenv("TEST_BOOL_1")
	defer os.Unsetenv("TEST_BOOL_FALSE")

	val, ok := envBool("TEST_BOOL_TRUE")
	assert.True(t, ok)
	assert.True(t, val)

	val, ok = envBool("TEST_BOOL_1")
	assert.True(t, ok)
	assert.True(t, val)

	val, ok = envBool("TEST_BOOL_FALSE")
	assert.True(t, ok)
	assert.False(t, val)

	_, ok = envBool("NON_EXISTENT")
	assert.False(t, ok)

	// Int
	os.Setenv("TEST_INT", "123")
	os.Setenv("TEST_INT_BAD", "abc")
	defer os.Unsetenv("TEST_INT")
	defer os.Unsetenv("TEST_INT_BAD")

	iVal, ok := envInt("TEST_INT")
	assert.True(t, ok)
	assert.Equal(t, 123, iVal)

	_, ok = envInt("TEST_INT_BAD")
	assert.False(t, ok)

	// Float
	os.Setenv("TEST_FLOAT", "123.456")
	defer os.Unsetenv("TEST_FLOAT")

	fVal, ok := envFloat("TEST_FLOAT")
	assert.True(t, ok)
	assert.InDelta(t, 123.456, fVal, 0.001)

	// String
	os.Setenv("TEST_STRING", "val")
	defer os.Unsetenv("TEST_STRING")
	sVal, ok := envString("TEST_STRING")
	assert.True(t, ok)
	assert.Equal(t, "val", sVal)
}

func TestApplyEvalEnvOverrides_Comprehensive(t *testing.T) {
	// Set all env vars
	envVars := map[string]string{
		"EVAL_HTTP_TIMEOUT":            "60",
		"EVAL_STEP_RETRIES":            "3",
		"EVAL_RETRY_ON_PHANTOM":        "true",
		"EVAL_RETRY_ON_EXPLICIT_TOOL":  "true",
		"EVAL_RETRY_ON_STREAM_FAILURE": "false",
		"EVAL_RETRY_ON_EMPTY_RESPONSE": "false",
		"EVAL_RETRY_ON_TOOL_ERRORS":    "true",
		"EVAL_RETRY_ON_RATE_LIMIT":     "true",
		"EVAL_RATE_LIMIT_COOLDOWN":     "5",
		"EVAL_PREFLIGHT":               "true",
		"EVAL_PREFLIGHT_TIMEOUT":       "10",
		"EVAL_MODEL":                   "gpt-4",
		"EVAL_REPORT_DIR":              "/tmp/reports",
	}

	for k, v := range envVars {
		os.Setenv(k, v)
		defer os.Unsetenv(k)
	}

	cfg := Config{} // Empty config
	applyEvalEnvOverrides(&cfg)

	assert.Equal(t, 60*time.Second, cfg.RequestTimeout)
	assert.Equal(t, 3, cfg.StepRetries)
	assert.True(t, cfg.RetryOnPhantom)
	assert.True(t, cfg.RetryOnExplicitTool)
	assert.False(t, cfg.RetryOnStreamFailure)
	assert.False(t, cfg.RetryOnEmptyResponse)
	assert.True(t, cfg.RetryOnToolErrors)
	assert.True(t, cfg.RetryOnRateLimit)
	assert.Equal(t, 5*time.Second, cfg.RateLimitCooldown)
	assert.True(t, cfg.Preflight)
	assert.Equal(t, 10*time.Second, cfg.PreflightTimeout)
	assert.Equal(t, "gpt-4", cfg.Model)
	assert.Equal(t, "/tmp/reports", cfg.ReportDir)
}

func TestRunner_RunPreflight(t *testing.T) {
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		w.Header().Set("Content-Type", "text/event-stream")
		fmt.Fprintf(w, "data: {\"type\":\"content\",\"data\":{\"text\":\"Hello\"}}\n\n")
		fmt.Fprintf(w, "data: {\"type\":\"done\",\"data\":{}}\n\n")
	}))
	defer server.Close()

	cfg := DefaultConfig()
	cfg.BaseURL = server.URL
	cfg.PreflightTimeout = 1 * time.Second
	runner := NewRunner(cfg)

	result := runner.runPreflight()
	assert.True(t, result.Success)
	assert.Equal(t, "Preflight", result.StepName)
	assert.NotEmpty(t, result.Content)
}

func TestRunner_RunPreflight_Fail(t *testing.T) {
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		// Return empty successful response (which preflight considers failure)
		w.Header().Set("Content-Type", "text/event-stream")
		fmt.Fprintf(w, "data: {\"type\":\"done\",\"data\":{}}\n\n")
	}))
	defer server.Close()

	cfg := DefaultConfig()
	cfg.BaseURL = server.URL
	runner := NewRunner(cfg)

	result := runner.runPreflight()
	assert.False(t, result.Success)
	assert.Error(t, result.Error)
	assert.Contains(t, result.Error.Error(), "preflight returned empty response")
}