Pulse/internal/ai/patrol.go

// patrol.go defines the core types, interfaces, and struct for the PatrolService.
//
// Architecture:
//
//	Scheduled/Event Trigger
//	        │
//	        ▼
//	buildSeedContextState()  ── patrol runtime state
//	        │
//	        ▼
//	runAIAnalysisState() ── agentic LLM loop with tools
//	        │
//	        ▼
//	recordFinding()     ── dedup, threshold validation
//	        │
//	        ├──▶ MaybeInvestigateFinding()  ── autonomous investigation
//	        │         │
//	        │         ▼
//	        │    parseInvestigationSummary() ── extract PROPOSED_FIX
//	        │         │
//	        │         ▼
//	        │    approval / execution / verification
//	        │
//	        └──▶ generateRemediationPlan()  ── template-based fix plan
//
// Safety: All command execution goes through internal/ai/safety for
// blocked command detection. Investigation guardrails and remediation
// engine both delegate to the shared safety package.
//
// The patrol system is split across these files:
//   - patrol.go: types, interfaces, PatrolService struct, constructor
//   - patrol_init.go: configuration, setters, getters, dependency injection
//   - patrol_run.go: lifecycle, scheduling, streaming, alert resolution
//   - patrol_ai.go: LLM interaction, seed context, prompt construction
//   - patrol_findings.go: finding lifecycle, remediation planning, investigation
//   - patrol_signals.go: deterministic signal detection from tool call outputs
//   - patrol_metrics.go: metrics collection for patrol operations
package ai

import (
	"encoding/json"
	"strings"
	"sync"
	"sync/atomic"
	"time"

	"github.com/rcourtman/pulse-go-rewrite/internal/ai/baseline"
	"github.com/rcourtman/pulse-go-rewrite/internal/ai/circuit"
	"github.com/rcourtman/pulse-go-rewrite/internal/ai/knowledge"
	"github.com/rcourtman/pulse-go-rewrite/internal/ai/memory"
	"github.com/rcourtman/pulse-go-rewrite/internal/alerts"
	"github.com/rcourtman/pulse-go-rewrite/internal/relay"
	"github.com/rcourtman/pulse-go-rewrite/internal/servicediscovery"
	"github.com/rcourtman/pulse-go-rewrite/internal/unifiedresources"
	"github.com/rcourtman/pulse-go-rewrite/pkg/aicontracts"
)

// ThresholdProvider provides user-configured alert thresholds for patrol to use
type ThresholdProvider interface {
	// GetNodeCPUThreshold returns the CPU alert trigger threshold for nodes (0-100%)
	GetNodeCPUThreshold() float64
	// GetNodeMemoryThreshold returns the memory alert trigger threshold for nodes (0-100%)
	GetNodeMemoryThreshold() float64
	// GetGuestMemoryThreshold returns the memory alert trigger threshold for guests (0-100%)
	GetGuestMemoryThreshold() float64
	// GetGuestDiskThreshold returns the disk alert trigger threshold for guests (0-100%)
	GetGuestDiskThreshold() float64
	// GetStorageThreshold returns the usage alert trigger threshold for storage (0-100%)
	GetStorageThreshold() float64
}

// AlertResolver provides the ability to review and resolve alerts
type AlertResolver interface {
	// GetActiveAlerts returns all currently active alerts
	GetActiveAlerts() []AlertInfo
	// ResolveAlert clears an active alert, returns true if successful
	ResolveAlert(alertID string) bool
}

// PatrolStatus represents the current state of the patrol service
type PatrolRuntimeState string

const (
	PatrolRuntimeStateUnavailable PatrolRuntimeState = "unavailable"
	PatrolRuntimeStateDisabled    PatrolRuntimeState = "disabled"
	PatrolRuntimeStateRunning     PatrolRuntimeState = "running"
	PatrolRuntimeStateBlocked     PatrolRuntimeState = "blocked"
	PatrolRuntimeStateActive      PatrolRuntimeState = "active"
)

const (
	patrolQuickstartCreditsExhaustedReason   = "Quickstart credits exhausted. Connect your API key to continue using AI Patrol."
	patrolQuickstartActivationRequiredReason = "Activate this install or start a trial to use AI Patrol quickstart. Otherwise connect your API key."
	patrolQuickstartUnavailableReason        = "Quickstart credits require internet access. Connect your API key for offline AI Patrol."
)

type PatrolStatus struct {
	RuntimeState     PatrolRuntimeState `json:"runtime_state"`
	Running          bool               `json:"running"`
	Enabled          bool               `json:"enabled"`
	LastPatrolAt     *time.Time         `json:"last_patrol_at,omitempty"`   // Last completed full patrol
	LastActivityAt   *time.Time         `json:"last_activity_at,omitempty"` // Last completed Patrol activity of any kind
	TriggerStatus    *TriggerStatus     `json:"trigger_status,omitempty"`
	NextPatrolAt     *time.Time         `json:"next_patrol_at,omitempty"`
	LastDuration     time.Duration      `json:"last_duration_ms"`
	ResourcesChecked int                `json:"resources_checked"`
	FindingsCount    int                `json:"findings_count"`
	ErrorCount       int                `json:"error_count"`
	Healthy          bool               `json:"healthy"`
	IntervalMs       int64              `json:"interval_ms"` // Patrol interval in milliseconds
	BlockedReason    string             `json:"blocked_reason,omitempty"`
	BlockedAt        *time.Time         `json:"blocked_at,omitempty"`
	// Quickstart credit info for the frontend
	QuickstartCreditsRemaining int  `json:"quickstart_credits_remaining"`
	QuickstartCreditsTotal     int  `json:"quickstart_credits_total"`
	UsingQuickstart            bool `json:"using_quickstart"`
}

// PatrolRunRecord represents a single patrol check run
type PatrolRunRecord struct {
	ID                        string        `json:"id"`
	StartedAt                 time.Time     `json:"started_at"`
	CompletedAt               time.Time     `json:"completed_at"`
	Duration                  time.Duration `json:"-"`
	DurationMs                int64         `json:"duration_ms"`
	Type                      string        `json:"type"` // Always "patrol" now (kept for backwards compat)
	TriggerReason             string        `json:"trigger_reason,omitempty"`
	ScopeResourceIDs          []string      `json:"scope_resource_ids,omitempty"`
	EffectiveScopeResourceIDs []string      `json:"effective_scope_resource_ids,omitempty"`
	ScopeResourceTypes        []string      `json:"scope_resource_types,omitempty"`
	ScopeContext              string        `json:"scope_context,omitempty"`
	AlertIdentifier           string        `json:"alert_identifier,omitempty"`
	FindingID                 string        `json:"finding_id,omitempty"`
	ResourcesChecked          int           `json:"resources_checked"`
	// Breakdown by resource type
	NodesChecked      int `json:"nodes_checked"`
	GuestsChecked     int `json:"guests_checked"`
	DockerChecked     int `json:"docker_checked"`
	StorageChecked    int `json:"storage_checked"`
	HostsChecked      int `json:"hosts_checked"`
	TrueNASChecked    int `json:"truenas_checked"`
	PBSChecked        int `json:"pbs_checked"`
	PMGChecked        int `json:"pmg_checked"`
	KubernetesChecked int `json:"kubernetes_checked"`
	// Findings from this run
	NewFindings      int      `json:"new_findings"`
	ExistingFindings int      `json:"existing_findings"`
	RejectedFindings int      `json:"rejected_findings"`
	ResolvedFindings int      `json:"resolved_findings"`
	AutoFixCount     int      `json:"auto_fix_count,omitempty"`
	FindingsSummary  string   `json:"findings_summary"` // e.g., "All healthy" or "2 warnings, 1 critical"
	FindingIDs       []string `json:"finding_ids"`      // IDs of findings from this run
	ErrorCount       int      `json:"error_count"`
	Status           string   `json:"status"` // "healthy", "issues_found", "error"
	// Triage stats
	TriageFlags      int  `json:"triage_flags"`                 // Number of deterministic flags found
	TriageSkippedLLM bool `json:"triage_skipped_llm,omitempty"` // True if LLM was skipped (quiet infra)
	// AI Analysis details
	AIAnalysis   string `json:"ai_analysis,omitempty"`   // The AI's raw response/analysis
	InputTokens  int    `json:"input_tokens,omitempty"`  // Tokens sent to AI
	OutputTokens int    `json:"output_tokens,omitempty"` // Tokens received from AI
	// Tool call traces
	ToolCalls     []ToolCallRecord `json:"tool_calls,omitempty"`
	ToolCallCount int              `json:"tool_call_count"`
}

type patrolRunRecordJSON struct {
	ID                        string           `json:"id"`
	StartedAt                 time.Time        `json:"started_at"`
	CompletedAt               time.Time        `json:"completed_at"`
	DurationMs                int64            `json:"duration_ms"`
	Type                      string           `json:"type"`
	TriggerReason             string           `json:"trigger_reason,omitempty"`
	ScopeResourceIDs          *[]string        `json:"scope_resource_ids,omitempty"`
	EffectiveScopeResourceIDs *[]string        `json:"effective_scope_resource_ids,omitempty"`
	ScopeResourceTypes        *[]string        `json:"scope_resource_types,omitempty"`
	ScopeContext              string           `json:"scope_context,omitempty"`
	AlertIdentifier           string           `json:"alert_identifier,omitempty"`
	FindingID                 string           `json:"finding_id,omitempty"`
	ResourcesChecked          int              `json:"resources_checked"`
	NodesChecked              int              `json:"nodes_checked"`
	GuestsChecked             int              `json:"guests_checked"`
	DockerChecked             int              `json:"docker_checked"`
	StorageChecked            int              `json:"storage_checked"`
	HostsChecked              int              `json:"hosts_checked"`
	TrueNASChecked            int              `json:"truenas_checked"`
	PBSChecked                int              `json:"pbs_checked"`
	PMGChecked                int              `json:"pmg_checked"`
	KubernetesChecked         int              `json:"kubernetes_checked"`
	NewFindings               int              `json:"new_findings"`
	ExistingFindings          int              `json:"existing_findings"`
	RejectedFindings          int              `json:"rejected_findings"`
	ResolvedFindings          int              `json:"resolved_findings"`
	AutoFixCount              int              `json:"auto_fix_count,omitempty"`
	FindingsSummary           string           `json:"findings_summary"`
	FindingIDs                []string         `json:"finding_ids"`
	ErrorCount                int              `json:"error_count"`
	Status                    string           `json:"status"`
	TriageFlags               int              `json:"triage_flags"`
	TriageSkippedLLM          bool             `json:"triage_skipped_llm,omitempty"`
	AIAnalysis                string           `json:"ai_analysis,omitempty"`
	InputTokens               int              `json:"input_tokens,omitempty"`
	OutputTokens              int              `json:"output_tokens,omitempty"`
	ToolCalls                 []ToolCallRecord `json:"tool_calls,omitempty"`
	ToolCallCount             int              `json:"tool_call_count"`
}

func canonicalPatrolAlertIdentifier(alertIdentifier string) string {
	return strings.TrimSpace(alertIdentifier)
}

func marshalOptionalPatrolStringSlice(values []string) *[]string {
	if values == nil {
		return nil
	}
	cloned := append([]string{}, values...)
	return &cloned
}

func unmarshalOptionalPatrolStringSlice(values *[]string) []string {
	if values == nil {
		return nil
	}
	return append([]string{}, (*values)...)
}

func canonicalPatrolFindingIDs(ids []string) []string {
	if ids == nil {
		return []string{}
	}
	return append([]string{}, ids...)
}

func normalizePatrolRunRecord(record PatrolRunRecord) PatrolRunRecord {
	record.AlertIdentifier = canonicalPatrolAlertIdentifier(record.AlertIdentifier)
	record.FindingIDs = canonicalPatrolFindingIDs(record.FindingIDs)
	return record
}

func (r PatrolRunRecord) MarshalJSON() ([]byte, error) {
	normalized := normalizePatrolRunRecord(r)
	return json.Marshal(patrolRunRecordJSON{
		ID:                        normalized.ID,
		StartedAt:                 normalized.StartedAt,
		CompletedAt:               normalized.CompletedAt,
		DurationMs:                normalized.DurationMs,
		Type:                      normalized.Type,
		TriggerReason:             normalized.TriggerReason,
		ScopeResourceIDs:          marshalOptionalPatrolStringSlice(normalized.ScopeResourceIDs),
		EffectiveScopeResourceIDs: marshalOptionalPatrolStringSlice(normalized.EffectiveScopeResourceIDs),
		ScopeResourceTypes:        marshalOptionalPatrolStringSlice(normalized.ScopeResourceTypes),
		ScopeContext:              normalized.ScopeContext,
		AlertIdentifier:           normalized.AlertIdentifier,
		FindingID:                 normalized.FindingID,
		ResourcesChecked:          normalized.ResourcesChecked,
		NodesChecked:              normalized.NodesChecked,
		GuestsChecked:             normalized.GuestsChecked,
		DockerChecked:             normalized.DockerChecked,
		StorageChecked:            normalized.StorageChecked,
		HostsChecked:              normalized.HostsChecked,
		TrueNASChecked:            normalized.TrueNASChecked,
		PBSChecked:                normalized.PBSChecked,
		PMGChecked:                normalized.PMGChecked,
		KubernetesChecked:         normalized.KubernetesChecked,
		NewFindings:               normalized.NewFindings,
		ExistingFindings:          normalized.ExistingFindings,
		RejectedFindings:          normalized.RejectedFindings,
		ResolvedFindings:          normalized.ResolvedFindings,
		AutoFixCount:              normalized.AutoFixCount,
		FindingsSummary:           normalized.FindingsSummary,
		FindingIDs:                normalized.FindingIDs,
		ErrorCount:                normalized.ErrorCount,
		Status:                    normalized.Status,
		TriageFlags:               normalized.TriageFlags,
		TriageSkippedLLM:          normalized.TriageSkippedLLM,
		AIAnalysis:                normalized.AIAnalysis,
		InputTokens:               normalized.InputTokens,
		OutputTokens:              normalized.OutputTokens,
		ToolCalls:                 normalized.ToolCalls,
		ToolCallCount:             normalized.ToolCallCount,
	})
}

func (r *PatrolRunRecord) UnmarshalJSON(data []byte) error {
	var payload patrolRunRecordJSON
	if err := json.Unmarshal(data, &payload); err != nil {
		return err
	}

	*r = normalizePatrolRunRecord(PatrolRunRecord{
		ID:                        payload.ID,
		StartedAt:                 payload.StartedAt,
		CompletedAt:               payload.CompletedAt,
		Duration:                  time.Duration(payload.DurationMs) * time.Millisecond,
		DurationMs:                payload.DurationMs,
		Type:                      payload.Type,
		TriggerReason:             payload.TriggerReason,
		ScopeResourceIDs:          unmarshalOptionalPatrolStringSlice(payload.ScopeResourceIDs),
		EffectiveScopeResourceIDs: unmarshalOptionalPatrolStringSlice(payload.EffectiveScopeResourceIDs),
		ScopeResourceTypes:        unmarshalOptionalPatrolStringSlice(payload.ScopeResourceTypes),
		ScopeContext:              payload.ScopeContext,
		AlertIdentifier:           canonicalPatrolAlertIdentifier(payload.AlertIdentifier),
		FindingID:                 payload.FindingID,
		ResourcesChecked:          payload.ResourcesChecked,
		NodesChecked:              payload.NodesChecked,
		GuestsChecked:             payload.GuestsChecked,
		DockerChecked:             payload.DockerChecked,
		StorageChecked:            payload.StorageChecked,
		HostsChecked:              payload.HostsChecked,
		TrueNASChecked:            payload.TrueNASChecked,
		PBSChecked:                payload.PBSChecked,
		PMGChecked:                payload.PMGChecked,
		KubernetesChecked:         payload.KubernetesChecked,
		NewFindings:               payload.NewFindings,
		ExistingFindings:          payload.ExistingFindings,
		RejectedFindings:          payload.RejectedFindings,
		ResolvedFindings:          payload.ResolvedFindings,
		AutoFixCount:              payload.AutoFixCount,
		FindingsSummary:           payload.FindingsSummary,
		FindingIDs:                payload.FindingIDs,
		ErrorCount:                payload.ErrorCount,
		Status:                    payload.Status,
		TriageFlags:               payload.TriageFlags,
		TriageSkippedLLM:          payload.TriageSkippedLLM,
		AIAnalysis:                payload.AIAnalysis,
		InputTokens:               payload.InputTokens,
		OutputTokens:              payload.OutputTokens,
		ToolCalls:                 payload.ToolCalls,
		ToolCallCount:             payload.ToolCallCount,
	})
	return nil
}

// MaxPatrolRunHistory is the maximum number of patrol runs to keep in history
const MaxPatrolRunHistory = 100

const (
	MaxToolInputSize   = 1024 // max chars for tool input in persisted record
	MaxToolOutputSize  = 2048 // max chars for tool output in persisted record
	MaxToolCallsPerRun = 100  // max tool calls stored per run (oldest dropped)
)

// LearningProvider provides learned preferences for patrol context
type LearningProvider interface {
	// FormatForContext returns learned preferences formatted for AI prompt injection
	FormatForContext() string
}

// ProxmoxEventProvider provides recent Proxmox events for patrol context
type ProxmoxEventProvider interface {
	// FormatForPatrol formats recent events for AI patrol context
	FormatForPatrol(duration time.Duration) string
}

// ForecastProvider provides trend forecasts for patrol context
type ForecastProvider interface {
	// FormatKeyForecasts returns formatted forecasts for resources with concerning trends
	FormatKeyForecasts() string
}

// UnifiedFindingCallback is called when patrol creates a new finding
// It allows the unified store to receive patrol findings in addition to alerts
type UnifiedFindingCallback func(f *Finding) bool

// PushNotifyCallback is called to send a push notification through the relay.
type PushNotifyCallback func(notification relay.PushNotificationPayload)

// InvestigationOrchestrator is the interface for autonomous investigation of findings.
// Re-exported from pkg/aicontracts for backwards compatibility.
type InvestigationOrchestrator = aicontracts.InvestigationOrchestrator

// InvestigationStoreMaintainer is an optional interface for orchestrators that
// expose their investigation store for periodic maintenance.
// Re-exported from pkg/aicontracts for backwards compatibility.
type InvestigationStoreMaintainer = aicontracts.InvestigationStoreMaintainer

// InvestigationFinding is the shared type used by patrol and investigation orchestration.
type InvestigationFinding = aicontracts.Finding

// InvestigationSession represents the result of an investigation.
// Re-exported from pkg/aicontracts for backwards compatibility.
type InvestigationSession = aicontracts.InvestigationSession

// InvestigationFix represents a proposed remediation action.
// Re-exported from pkg/aicontracts for backwards compatibility.
type InvestigationFix = aicontracts.Fix

// PatrolService runs background AI analysis of infrastructure
type PatrolService struct {
	mu sync.RWMutex

	aiService           *Service
	stateProvider       StateProvider
	thresholdProvider   ThresholdProvider
	config              PatrolConfig
	findings            *FindingsStore
	knowledgeStore      *knowledge.Store        // For per-resource notes in patrol context
	discoveryStore      *servicediscovery.Store // For AI-discovered infrastructure context
	guestProber         GuestProber             // For pre-patrol guest reachability checks
	metricsHistory      MetricsHistoryProvider  // For trend analysis and predictions
	baselineStore       *baseline.Store         // For anomaly detection via learned baselines
	changeDetector      *ChangeDetector         // For tracking infrastructure changes
	remediationLog      *RemediationLog         // For tracking remediation actions
	patternDetector     *PatternDetector        // For failure prediction from historical patterns
	correlationDetector *CorrelationDetector    // For multi-resource correlation
	incidentStore       *memory.IncidentStore   // For incident timeline capture
	alertResolver       AlertResolver           // For AI-based alert resolution

	// Unified resource provider — reads physical disks, Ceph, etc. from canonical model
	unifiedResourceProvider UnifiedResourceProvider
	// ReadState provides typed read-only views over resource state (VMs, nodes, hosts, etc.).
	// This is injected separately from stateProvider since stateProvider also contains
	// non-resource telemetry (alerts, backups, connection health) that isn't modeled as resources yet.
	readState unifiedresources.ReadState

	// New AI intelligence providers (Phase 6)
	learningProvider     LearningProvider     // For learned preferences from user feedback
	proxmoxEventProvider ProxmoxEventProvider // For recent Proxmox operations
	forecastProvider     ForecastProvider     // For trend forecasts

	// Event-driven patrol triggers (Phase 7)
	triggerManager     *TriggerManager          // For event-driven patrol scheduling
	eventTriggerConfig PatrolEventTriggerConfig // Canonical scoped patrol trigger preferences

	// Unified intelligence facade - aggregates all subsystems for unified view
	intelligence *Intelligence

	// Circuit breaker for resilient AI API calls
	circuitBreaker *circuit.Breaker

	// Remediation engine for generating fix plans from findings
	remediationEngine aicontracts.RemediationEngine

	// Investigation orchestrator for autonomous investigation of findings
	investigationOrchestrator InvestigationOrchestrator
	investigationWg           sync.WaitGroup // Tracks in-flight investigation goroutines

	// Unified findings callback - pushes findings to unified store
	unifiedFindingCallback UnifiedFindingCallback
	// Unified resolver callback - marks findings resolved in unified store
	unifiedFindingResolver func(findingID string)

	// Push notification callback - sends push via relay to mobile devices
	pushNotifyCallback PushNotifyCallback

	// Cached thresholds (recalculated when thresholdProvider changes)
	thresholds    PatrolThresholds
	proactiveMode bool // When true, warn before thresholds; when false, use exact thresholds

	// Runtime state
	running           bool
	runInProgress     bool
	runStartedAt      time.Time
	stopCh            chan struct{}
	configChanged     chan struct{} // Signal when config changes to reset ticker
	lastFullPatrol    time.Time
	lastActivity      time.Time
	lastDuration      time.Duration
	resourcesChecked  int
	errorCount        int
	lastBlockedReason string
	lastBlockedAt     time.Time
	nextScheduledAt   time.Time // Tracks actual next patrol time (accounts for ticker resets)

	// Patrol run history with persistence support
	runHistoryStore *PatrolRunHistoryStore

	// Quickstart credit manager for free hosted patrol runs
	quickstartCredits QuickstartCreditManager

	// Ad-hoc trigger channel for event-driven patrols (alert driven)
	adHocTrigger chan *alerts.Alert

	// Live streaming support
	streamMu          sync.RWMutex
	streamSubscribers map[chan PatrolStreamEvent]*streamSubscriber
	currentOutput     streamOutputBuffer // Tail buffer for current streaming output
	streamPhase       string             // "idle", "analyzing", "complete"
	streamRunID       string             // Identifies the current streamed run (best-effort)
	streamSeq         int64              // Monotonic sequence for SSE events within streamRunID
	streamCurrentTool string             // Last observed tool name (best-effort)
	streamEvents      []PatrolStreamEvent
}

const patrolStreamMaxOutputBytes = 64 * 1024

// streamOutputBuffer retains only the most recent bytes written, to cap memory usage
// while still allowing late joiners to get a useful snapshot.
type streamOutputBuffer struct {
	buf       []byte
	truncated bool
}

func (b *streamOutputBuffer) Reset() {
	// Keep capacity bounded so long-running streams can't retain a large backing array.
	if cap(b.buf) > patrolStreamMaxOutputBytes {
		b.buf = make([]byte, 0, patrolStreamMaxOutputBytes)
	} else {
		b.buf = b.buf[:0]
	}
	b.truncated = false
}

func (b *streamOutputBuffer) Len() int             { return len(b.buf) }
func (b *streamOutputBuffer) String() string       { return string(b.buf) }
func (b *streamOutputBuffer) Truncated() bool      { return b.truncated }
func (b *streamOutputBuffer) WriteString(s string) { b.appendString(s) }

func (b *streamOutputBuffer) appendString(s string) {
	if len(s) == 0 {
		return
	}

	max := patrolStreamMaxOutputBytes
	if len(s) >= max {
		// Keep only the tail of the incoming chunk.
		b.buf = append(b.buf[:0], s[len(s)-max:]...)
		b.truncated = true
		b.normalizeUTF8Start()
		b.shrinkCapIfNeeded()
		return
	}

	// Keep as much of existing tail as possible.
	needKeep := max - len(s)
	if len(b.buf) > needKeep {
		b.buf = append(b.buf[:0], b.buf[len(b.buf)-needKeep:]...)
		b.truncated = true
	}
	b.buf = append(b.buf, s...)
	if len(b.buf) > max {
		b.buf = b.buf[len(b.buf)-max:]
		b.truncated = true
	}
	b.normalizeUTF8Start()
	b.shrinkCapIfNeeded()
}

func (b *streamOutputBuffer) normalizeUTF8Start() {
	// If we truncated by bytes, we may have cut in the middle of a UTF-8 rune.
	// Drop leading continuation bytes so the string starts on a rune boundary.
	for len(b.buf) > 0 && (b.buf[0]&0xC0) == 0x80 {
		b.buf = b.buf[1:]
		b.truncated = true
	}
}

func (b *streamOutputBuffer) shrinkCapIfNeeded() {
	if cap(b.buf) <= patrolStreamMaxOutputBytes {
		return
	}
	tmp := make([]byte, len(b.buf), patrolStreamMaxOutputBytes)
	copy(tmp, b.buf)
	b.buf = tmp
}

// ToolCallRecord captures a single tool invocation during a patrol run.
type ToolCallRecord struct {
	ID        string `json:"id"`
	ToolName  string `json:"tool_name"`
	Input     string `json:"input"`
	Output    string `json:"output"`
	Success   bool   `json:"success"`
	StartTime int64  `json:"start_time"`
	EndTime   int64  `json:"end_time"`
	Duration  int64  `json:"duration_ms"`
}

// streamSubscriber wraps a stream channel with an atomic close flag
// to prevent double-close panics when both broadcast and unsubscribe race.
type streamSubscriber struct {
	ch     chan PatrolStreamEvent
	closed atomic.Bool
	// Consecutive times we couldn't deliver to this subscriber because its channel
	// was full. Used to tolerate short bursts without immediately disconnecting.
	fullCount int
}

// PatrolStreamEvent represents a streaming update from the patrol
type PatrolStreamEvent struct {
	// Meta
	// Seq is suitable to be used as an SSE "id:" for Last-Event-ID, but replay is best-effort.
	RunID string `json:"run_id,omitempty"`
	Seq   int64  `json:"seq,omitempty"`
	TsMs  int64  `json:"ts_ms,omitempty"`
	// If this is a synthetic snapshot/resync event, why it was emitted.
	// Examples: "late_joiner", "stale_last_event_id".
	ResyncReason string `json:"resync_reason,omitempty"`
	BufferStart  int64  `json:"buffer_start_seq,omitempty"`
	BufferEnd    int64  `json:"buffer_end_seq,omitempty"`
	// True when the snapshot content has been truncated due to the tail buffer.
	ContentTruncated *bool `json:"content_truncated,omitempty"`

	// Payload
	// Known types include: "snapshot", "start", "content", "phase", "thinking",
	// "complete", "error", "tool_start", "tool_end".
	Type    string `json:"type"`
	Content string `json:"content,omitempty"`
	Phase   string `json:"phase,omitempty"`  // Current phase description
	Tokens  int    `json:"tokens,omitempty"` // Token count so far
	// Tool event fields (present only for tool_start/tool_end)
	ToolID       string `json:"tool_id,omitempty"`
	ToolName     string `json:"tool_name,omitempty"`
	ToolInput    string `json:"tool_input,omitempty"`
	ToolRawInput string `json:"tool_raw_input,omitempty"`
	ToolOutput   string `json:"tool_output,omitempty"`
	ToolSuccess  *bool  `json:"tool_success,omitempty"` // pointer so omitempty works with false
}

// NewPatrolService creates a new patrol service
func NewPatrolService(aiService *Service, stateProvider StateProvider) *PatrolService {
	return &PatrolService{
		aiService:     aiService,
		stateProvider: stateProvider,
		config:        DefaultPatrolConfig(),
		findings:      NewFindingsStore(),
		thresholds:    DefaultPatrolThresholds(),
		eventTriggerConfig: PatrolEventTriggerConfig{
			AlertTriggersEnabled:   true,
			AnomalyTriggersEnabled: true,
		},
		stopCh:            make(chan struct{}),
		runHistoryStore:   NewPatrolRunHistoryStore(MaxPatrolRunHistory),
		streamSubscribers: make(map[chan PatrolStreamEvent]*streamSubscriber),
		streamPhase:       "idle",
		adHocTrigger:      make(chan *alerts.Alert, 10), // Buffer triggers
	}
}