Pulse/internal/servicediscovery/service.go

// Package servicediscovery provides infrastructure discovery capabilities.
// It discovers services, versions, configurations, and CLI access methods
// for VMs, LXCs, Docker containers, Kubernetes pods, and hosts.
package servicediscovery

import (
	"context"
	"encoding/json"
	"fmt"
	"sort"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/rs/zerolog/log"
)

// sensitiveKeyPatterns defines patterns that indicate a label/env key might contain secrets.
// These patterns are case-insensitive and match if any part of the key contains them.
var sensitiveKeyPatterns = []string{
	"password", "passwd", "pwd",
	"secret",
	"key", "apikey", "api_key",
	"token",
	"credential", "cred",
	"auth",
	"private",
	"cert",
}

// filterSensitiveLabels removes or redacts labels that may contain sensitive values.
// It returns a new map with sensitive values replaced with "[REDACTED]".
// Keys are checked case-insensitively for sensitive patterns.
func filterSensitiveLabels(labels map[string]string) map[string]string {
	if labels == nil {
		return nil
	}

	filtered := make(map[string]string, len(labels))
	redactedCount := 0

	for key, value := range labels {
		keyLower := strings.ToLower(key)
		isSensitive := false

		for _, pattern := range sensitiveKeyPatterns {
			if strings.Contains(keyLower, pattern) {
				isSensitive = true
				break
			}
		}

		if isSensitive {
			filtered[key] = "[REDACTED]"
			redactedCount++
		} else {
			filtered[key] = value
		}
	}

	if redactedCount > 0 {
		log.Debug().
			Int("redacted_count", redactedCount).
			Int("total_labels", len(labels)).
			Msg("Redacted sensitive labels before AI analysis")
	}

	return filtered
}

// StateProvider provides access to the current infrastructure state.
type StateProvider interface {
	GetState() StateSnapshot
}

// StateSnapshot represents the infrastructure state. This mirrors models.StateSnapshot
// to avoid circular dependencies.
type StateSnapshot struct {
	VMs                []VM
	Containers         []Container
	DockerHosts        []DockerHost
	KubernetesClusters []KubernetesCluster
	Hosts              []Host
	Nodes              []Node
}

// Node represents a Proxmox VE node.
type Node struct {
	ID                string
	Name              string
	LinkedHostAgentID string
}

// Host represents a host system (via host-agent).
type Host struct {
	ID            string
	Hostname      string
	DisplayName   string
	Platform      string // e.g., "linux", "darwin", "windows"
	OSName        string // e.g., "Unraid", "Ubuntu", "Debian"
	OSVersion     string
	KernelVersion string
	Architecture  string // e.g., "amd64", "arm64"
	CPUCount      int
	Status        string
	Tags          []string
}

// VM represents a virtual machine.
type VM struct {
	VMID     int
	Name     string
	Node     string
	Status   string
	Instance string
	// Additional metadata for fingerprinting
	CPUs        int      // Number of CPU cores
	MaxMemory   uint64   // Max memory in bytes
	MaxDisk     uint64   // Max disk in bytes
	Tags        []string // User-defined tags
	OSName      string   // Detected OS name
	OSVersion   string   // OS version string
	IPAddresses []string // IP addresses assigned to the VM
	Template    bool     // True if this is a template
}

// Container represents an LXC container.
type Container struct {
	VMID     int
	Name     string
	Node     string
	Status   string
	Instance string
	// Additional metadata for fingerprinting
	CPUs        int      // Number of CPU cores
	MaxMemory   uint64   // Max memory in bytes
	MaxDisk     uint64   // Max disk in bytes
	Tags        []string // User-defined tags
	OSTemplate  string   // Template or OCI image used
	OSName      string   // Detected OS name
	IsOCI       bool     // True if OCI container (Proxmox 9.1+)
	IPAddresses []string // IP addresses assigned to the container
	Template    bool     // True if this is a template
}

// DockerHost represents a Docker host.
type DockerHost struct {
	AgentID    string
	Hostname   string
	Containers []DockerContainer
}

// DockerContainer represents a Docker container.
type DockerContainer struct {
	ID     string
	Name   string
	Image  string
	Status string
	Ports  []DockerPort
	Labels map[string]string
	Mounts []DockerMount
}

// DockerPort represents a port mapping.
type DockerPort struct {
	PublicPort  int
	PrivatePort int
	Protocol    string
}

// DockerMount represents a mount point.
type DockerMount struct {
	Source      string
	Destination string
}

// KubernetesCluster represents a Kubernetes cluster.
type KubernetesCluster struct {
	ID      string
	Name    string
	AgentID string
	Status  string
	Pods    []KubernetesPod
}

// KubernetesPod represents a Kubernetes pod.
type KubernetesPod struct {
	UID        string
	Name       string
	Namespace  string
	NodeName   string
	Phase      string
	Labels     map[string]string
	OwnerKind  string // e.g., "Deployment", "StatefulSet", "DaemonSet"
	OwnerName  string
	Containers []KubernetesPodContainer
}

// KubernetesPodContainer represents a container within a Kubernetes pod.
type KubernetesPodContainer struct {
	Name         string
	Image        string
	Ready        bool
	RestartCount int32
	State        string // e.g., "running", "waiting", "terminated"
}

// AIAnalyzer provides AI analysis capabilities for discovery.
type AIAnalyzer interface {
	AnalyzeForDiscovery(ctx context.Context, prompt string) (string, error)
}

// WSMessage represents a WebSocket message for broadcasting.
type WSMessage struct {
	Type string      `json:"type"`
	Data interface{} `json:"data"`
}

// WSBroadcaster provides WebSocket broadcasting capabilities.
type WSBroadcaster interface {
	BroadcastDiscoveryProgress(progress *DiscoveryProgress)
}

// Service manages infrastructure discovery.
type Service struct {
	store         *Store
	scanner       *DeepScanner
	stateProvider StateProvider
	aiAnalyzer    AIAnalyzer
	wsHub         WSBroadcaster // WebSocket hub for broadcasting progress

	mu              sync.RWMutex
	running         bool
	stopCh          chan struct{}
	intervalCh      chan time.Duration // Channel for live interval updates
	interval        time.Duration
	initialDelay    time.Duration
	lastRun         time.Time
	deepScanTimeout time.Duration // Timeout for individual deep scans
	maxDiscoveryAge time.Duration // Max age before rediscovery (default 30 days)

	// Cache for AI analysis results (by image name)
	analysisCache map[string]*analysisCacheEntry
	cacheMu       sync.RWMutex
	cacheExpiry   time.Duration

	// In-progress discovery tracking (prevents duplicate concurrent discoveries)
	inProgressMu sync.Mutex
	inProgress   map[string]*discoveryInProgress
}

// discoveryInProgress tracks an ongoing discovery operation.
// Multiple callers can wait on the done channel for completion.
type discoveryInProgress struct {
	done   chan struct{}      // Closed when discovery completes
	result *ResourceDiscovery // Result after completion
	err    error              // Error after completion
}

// analysisCacheEntry holds a cached AI analysis result with its timestamp.
type analysisCacheEntry struct {
	result   *AIAnalysisResponse
	cachedAt time.Time
}

// Config holds discovery service configuration.
type Config struct {
	DataDir         string
	Interval        time.Duration // How often to run fingerprint collection (default 5 min)
	CacheExpiry     time.Duration // How long to cache AI analysis results
	DeepScanTimeout time.Duration // Timeout for individual deep scans (default 60s)

	// Fingerprint-based discovery settings
	MaxDiscoveryAge     time.Duration // Rediscover after this duration (default 30 days)
	FingerprintInterval time.Duration // How often to collect fingerprints (default 5 min)
}

// DefaultConfig returns the default discovery configuration.
func DefaultConfig() Config {
	return Config{
		Interval:            5 * time.Minute, // Fingerprint collection interval
		CacheExpiry:         1 * time.Hour,
		DeepScanTimeout:     60 * time.Second,
		MaxDiscoveryAge:     30 * 24 * time.Hour, // 30 days
		FingerprintInterval: 5 * time.Minute,
	}
}

// NewService creates a new discovery service.
func NewService(store *Store, scanner *DeepScanner, stateProvider StateProvider, cfg Config) *Service {
	if cfg.Interval == 0 {
		cfg.Interval = 5 * time.Minute
	}
	if cfg.CacheExpiry == 0 {
		cfg.CacheExpiry = 1 * time.Hour
	}
	if cfg.DeepScanTimeout == 0 {
		cfg.DeepScanTimeout = 60 * time.Second
	}
	if cfg.MaxDiscoveryAge == 0 {
		cfg.MaxDiscoveryAge = 30 * 24 * time.Hour // 30 days
	}

	return &Service{
		store:           store,
		scanner:         scanner,
		stateProvider:   stateProvider,
		interval:        cfg.Interval,
		initialDelay:    30 * time.Second,
		cacheExpiry:     cfg.CacheExpiry,
		deepScanTimeout: cfg.DeepScanTimeout,
		maxDiscoveryAge: cfg.MaxDiscoveryAge,
		stopCh:          make(chan struct{}),
		intervalCh:      make(chan time.Duration, 1), // Buffered to prevent blocking
		analysisCache:   make(map[string]*analysisCacheEntry),
		inProgress:      make(map[string]*discoveryInProgress),
	}
}

// SetAIAnalyzer sets the AI analyzer for discovery.
func (s *Service) SetAIAnalyzer(analyzer AIAnalyzer) {
	s.mu.Lock()
	defer s.mu.Unlock()
	s.aiAnalyzer = analyzer
}

// Start begins the background discovery service.
func (s *Service) Start(ctx context.Context) {
	s.mu.Lock()
	if s.running {
		s.mu.Unlock()
		return
	}
	s.running = true
	s.stopCh = make(chan struct{})
	s.mu.Unlock()

	log.Info().
		Dur("interval", s.interval).
		Msg("Starting infrastructure discovery service")

	go s.discoveryLoop(ctx)
}

// Stop stops the background discovery service.
func (s *Service) Stop() {
	s.mu.Lock()
	defer s.mu.Unlock()
	if s.running {
		close(s.stopCh)
		s.running = false
	}
}

// SetInterval updates the scan interval. Takes effect immediately if running.
func (s *Service) SetInterval(interval time.Duration) {
	s.mu.Lock()
	s.interval = interval
	running := s.running
	s.mu.Unlock()

	// If running, send the new interval to the loop (non-blocking)
	if running {
		select {
		case s.intervalCh <- interval:
			log.Info().Dur("interval", interval).Msg("Discovery interval updated (live)")
		default:
			// Channel full, interval will be picked up eventually
			log.Debug().Dur("interval", interval).Msg("Discovery interval updated (pending)")
		}
	}
}

// needsDeepScan determines if a discovery result needs a deep scan based on quality.
// Returns true if the discovery is incomplete or low-confidence.
func (s *Service) needsDeepScan(discovery *ResourceDiscovery) bool {
	if discovery == nil {
		return true // No discovery at all
	}

	// Already has deep scan data (raw command outputs)
	if len(discovery.RawCommandOutput) > 0 {
		return false
	}

	// Low confidence - needs more investigation
	if discovery.Confidence < 0.7 {
		return true
	}

	// Unknown service type
	if discovery.ServiceType == "" || discovery.ServiceType == "unknown" {
		return true
	}

	// Missing key paths that deep scan could discover
	if len(discovery.Facts) == 0 && len(discovery.ConfigPaths) == 0 && len(discovery.LogPaths) == 0 {
		return true
	}

	return false
}

// SetWSHub sets the WebSocket hub for broadcasting progress updates.
func (s *Service) SetWSHub(hub WSBroadcaster) {
	s.mu.Lock()
	s.wsHub = hub
	s.mu.Unlock()

	// Wire up the scanner's progress callback to broadcast via WebSocket
	if s.scanner != nil {
		s.scanner.SetProgressCallback(s.broadcastProgress)
	}

	log.Info().Msg("WebSocket hub connected to discovery service")
}

// broadcastProgress broadcasts discovery progress to all WebSocket clients.
func (s *Service) broadcastProgress(progress *DiscoveryProgress) {
	s.mu.RLock()
	hub := s.wsHub
	s.mu.RUnlock()

	if hub == nil || progress == nil {
		return
	}

	hub.BroadcastDiscoveryProgress(progress)
}

// IsRunning returns whether the background discovery loop is active.
func (s *Service) IsRunning() bool {
	s.mu.RLock()
	defer s.mu.RUnlock()
	return s.running
}

// discoveryLoop runs periodic fingerprint collection and automatic refreshes.
// Fingerprints detect changes cheaply; changed/stale/new resources are then refreshed.
func (s *Service) discoveryLoop(ctx context.Context) {
	delay := s.initialDelay
	if delay <= 0 {
		delay = 30 * time.Second
	}

	// Run initial fingerprint collection after a short delay
	select {
	case <-time.After(delay):
	case <-s.stopCh:
		return
	case <-ctx.Done():
		return
	}

	s.collectFingerprints(ctx)
	s.runAutomaticDiscoveryRefresh(ctx)

	s.mu.RLock()
	currentInterval := s.interval
	s.mu.RUnlock()

	ticker := time.NewTicker(currentInterval)
	defer ticker.Stop()

	for {
		select {
		case <-ticker.C:
			s.collectFingerprints(ctx)
			s.runAutomaticDiscoveryRefresh(ctx)
		case newInterval := <-s.intervalCh:
			// Interval changed - reset the ticker
			ticker.Stop()
			ticker = time.NewTicker(newInterval)
			log.Info().Dur("interval", newInterval).Msg("Fingerprint collection interval reset")
		case <-s.stopCh:
			log.Info().Msg("Stopping discovery service")
			return
		case <-ctx.Done():
			log.Info().Msg("Discovery context cancelled")
			return
		}
	}
}

func (s *Service) runAutomaticDiscoveryRefresh(ctx context.Context) {
	if ctx == nil || ctx.Err() != nil || s.store == nil {
		return
	}

	s.mu.RLock()
	analyzerConfigured := s.aiAnalyzer != nil
	maxDiscoveryAge := s.maxDiscoveryAge
	s.mu.RUnlock()
	if !analyzerConfigured {
		log.Debug().Msg("Skipping automatic discovery refresh - AI analyzer not configured")
		return
	}

	changedResources, err := s.store.GetChangedResources()
	if err != nil {
		log.Warn().Err(err).Msg("Failed to fetch changed resources for automatic discovery refresh")
		return
	}
	staleResources, err := s.store.GetStaleResources(maxDiscoveryAge)
	if err != nil {
		log.Warn().Err(err).Msg("Failed to fetch stale resources for automatic discovery refresh")
		return
	}

	candidates := make(map[string]struct{}, len(changedResources)+len(staleResources))
	for _, id := range changedResources {
		if strings.TrimSpace(id) != "" {
			candidates[id] = struct{}{}
		}
	}
	for _, id := range staleResources {
		if strings.TrimSpace(id) != "" {
			candidates[id] = struct{}{}
		}
	}
	if len(candidates) == 0 {
		return
	}

	resourceIDs := make([]string, 0, len(candidates))
	for id := range candidates {
		resourceIDs = append(resourceIDs, id)
	}
	sort.Strings(resourceIDs)

	log.Info().
		Int("changed", len(changedResources)).
		Int("stale", len(staleResources)).
		Int("total", len(resourceIDs)).
		Msg("Running automatic discovery refresh for changed/stale resources")

	discoveredCount := 0
	failedCount := 0

	for _, id := range resourceIDs {
		if ctx.Err() != nil {
			break
		}

		resourceType, hostID, resourceID, err := ParseResourceID(id)
		if err != nil {
			failedCount++
			log.Warn().
				Err(err).
				Str("resource_id", id).
				Msg("Skipping invalid resource ID during automatic discovery refresh")
			continue
		}

		_, err = s.DiscoverResource(ctx, DiscoveryRequest{
			ResourceType: resourceType,
			ResourceID:   resourceID,
			HostID:       hostID,
			Hostname:     hostID,
		})
		if err != nil {
			failedCount++
			log.Warn().
				Err(err).
				Str("resource_id", id).
				Str("resource_type", string(resourceType)).
				Msg("Automatic discovery refresh failed for resource")
			continue
		}
		discoveredCount++
	}

	log.Info().
		Int("discovered", discoveredCount).
		Int("failed", failedCount).
		Msg("Automatic discovery refresh completed")
}

// collectFingerprints collects fingerprints from all resources (Docker, LXC, VM).
// This is metadata-only and does not invoke the AI analyzer.
func (s *Service) collectFingerprints(ctx context.Context) {
	defer func() {
		if r := recover(); r != nil {
			log.Error().Interface("panic", r).Stack().Msg("Recovered from panic in fingerprint collection")
		}
	}()

	s.mu.Lock()
	s.lastRun = time.Now()
	s.mu.Unlock()

	if s.stateProvider == nil {
		return
	}

	state := s.stateProvider.GetState()
	changedCount := 0
	newCount := 0

	// Process Docker containers
	for _, host := range state.DockerHosts {
		for _, container := range host.Containers {
			select {
			case <-ctx.Done():
				return
			default:
			}

			// Generate new fingerprint (prefixed with docker: to avoid collisions)
			newFP := GenerateDockerFingerprint(host.AgentID, &container)
			fpKey := "docker:" + host.AgentID + ":" + newFP.ResourceID

			// Get previous fingerprint
			oldFP, _ := s.store.GetFingerprint(fpKey)

			// Update the fingerprint's ResourceID to include prefix for storage
			newFP.ResourceID = fpKey

			// Save new fingerprint
			if err := s.store.SaveFingerprint(newFP); err != nil {
				log.Warn().Err(err).Str("container", container.Name).Msg("Failed to save Docker fingerprint")
				continue
			}

			// Check if this is new or changed
			if oldFP == nil {
				newCount++
				log.Debug().
					Str("type", "docker").
					Str("container", container.Name).
					Str("hash", newFP.Hash).
					Msg("New fingerprint captured")
			} else if newFP.HasSchemaChanged(oldFP) {
				// Schema changed - don't count as "changed" to avoid mass rediscovery
				log.Debug().
					Str("type", "docker").
					Str("container", container.Name).
					Int("old_schema", oldFP.SchemaVersion).
					Int("new_schema", newFP.SchemaVersion).
					Msg("Fingerprint schema updated")
			} else if oldFP.Hash != newFP.Hash {
				changedCount++
				log.Info().
					Str("type", "docker").
					Str("container", container.Name).
					Str("old_hash", oldFP.Hash).
					Str("new_hash", newFP.Hash).
					Msg("Fingerprint changed - discovery will run on next request")
			}
		}
	}

	// Process LXC containers
	for _, lxc := range state.Containers {
		select {
		case <-ctx.Done():
			return
		default:
		}

		// Generate new fingerprint
		newFP := GenerateLXCFingerprint(lxc.Node, &lxc)
		fpKey := "lxc:" + lxc.Node + ":" + newFP.ResourceID

		// Get previous fingerprint
		oldFP, _ := s.store.GetFingerprint(fpKey)

		// Update the fingerprint's ResourceID to include prefix for storage
		newFP.ResourceID = fpKey

		// Save new fingerprint
		if err := s.store.SaveFingerprint(newFP); err != nil {
			log.Warn().Err(err).Str("lxc", lxc.Name).Msg("Failed to save LXC fingerprint")
			continue
		}

		// Check if this is new or changed
		if oldFP == nil {
			newCount++
			log.Debug().
				Str("type", "lxc").
				Str("name", lxc.Name).
				Int("vmid", lxc.VMID).
				Str("hash", newFP.Hash).
				Msg("New fingerprint captured")
		} else if newFP.HasSchemaChanged(oldFP) {
			log.Debug().
				Str("type", "lxc").
				Str("name", lxc.Name).
				Int("vmid", lxc.VMID).
				Int("old_schema", oldFP.SchemaVersion).
				Int("new_schema", newFP.SchemaVersion).
				Msg("Fingerprint schema updated")
		} else if oldFP.Hash != newFP.Hash {
			changedCount++
			log.Info().
				Str("type", "lxc").
				Str("name", lxc.Name).
				Int("vmid", lxc.VMID).
				Str("old_hash", oldFP.Hash).
				Str("new_hash", newFP.Hash).
				Msg("Fingerprint changed - discovery will run on next request")
		}
	}

	// Process VMs
	for _, vm := range state.VMs {
		select {
		case <-ctx.Done():
			return
		default:
		}

		// Generate new fingerprint
		newFP := GenerateVMFingerprint(vm.Node, &vm)
		fpKey := "vm:" + vm.Node + ":" + newFP.ResourceID

		// Get previous fingerprint
		oldFP, _ := s.store.GetFingerprint(fpKey)

		// Update the fingerprint's ResourceID to include prefix for storage
		newFP.ResourceID = fpKey

		// Save new fingerprint
		if err := s.store.SaveFingerprint(newFP); err != nil {
			log.Warn().Err(err).Str("vm", vm.Name).Msg("Failed to save VM fingerprint")
			continue
		}

		// Check if this is new or changed
		if oldFP == nil {
			newCount++
			log.Debug().
				Str("type", "vm").
				Str("name", vm.Name).
				Int("vmid", vm.VMID).
				Str("hash", newFP.Hash).
				Msg("New fingerprint captured")
		} else if newFP.HasSchemaChanged(oldFP) {
			log.Debug().
				Str("type", "vm").
				Str("name", vm.Name).
				Int("vmid", vm.VMID).
				Int("old_schema", oldFP.SchemaVersion).
				Int("new_schema", newFP.SchemaVersion).
				Msg("Fingerprint schema updated")
		} else if oldFP.Hash != newFP.Hash {
			changedCount++
			log.Info().
				Str("type", "vm").
				Str("name", vm.Name).
				Int("vmid", vm.VMID).
				Str("old_hash", oldFP.Hash).
				Str("new_hash", newFP.Hash).
				Msg("Fingerprint changed - discovery will run on next request")
		}
	}

	// Process Kubernetes pods
	for _, cluster := range state.KubernetesClusters {
		for _, pod := range cluster.Pods {
			select {
			case <-ctx.Done():
				return
			default:
			}

			// Generate new fingerprint
			newFP := GenerateK8sPodFingerprint(cluster.ID, &pod)
			fpKey := "k8s:" + cluster.ID + ":" + pod.Namespace + "/" + pod.Name

			// Get previous fingerprint
			oldFP, _ := s.store.GetFingerprint(fpKey)

			// Update the fingerprint's ResourceID to include prefix for storage
			newFP.ResourceID = fpKey

			// Save new fingerprint
			if err := s.store.SaveFingerprint(newFP); err != nil {
				log.Warn().Err(err).Str("pod", pod.Name).Str("namespace", pod.Namespace).Msg("Failed to save K8s pod fingerprint")
				continue
			}

			// Check if this is new or changed
			if oldFP == nil {
				newCount++
				log.Debug().
					Str("type", "k8s").
					Str("name", pod.Name).
					Str("namespace", pod.Namespace).
					Str("cluster", cluster.Name).
					Str("hash", newFP.Hash).
					Msg("New fingerprint captured")
			} else if newFP.HasSchemaChanged(oldFP) {
				log.Debug().
					Str("type", "k8s").
					Str("name", pod.Name).
					Str("namespace", pod.Namespace).
					Str("cluster", cluster.Name).
					Int("old_schema", oldFP.SchemaVersion).
					Int("new_schema", newFP.SchemaVersion).
					Msg("Fingerprint schema updated")
			} else if oldFP.Hash != newFP.Hash {
				changedCount++
				log.Info().
					Str("type", "k8s").
					Str("name", pod.Name).
					Str("namespace", pod.Namespace).
					Str("cluster", cluster.Name).
					Str("old_hash", oldFP.Hash).
					Str("new_hash", newFP.Hash).
					Msg("Fingerprint changed - discovery will run on next request")
			}
		}
	}

	// Update last scan time
	s.store.SetLastFingerprintScan(time.Now())

	if newCount > 0 || changedCount > 0 {
		log.Info().
			Int("new", newCount).
			Int("changed", changedCount).
			Int("total", s.store.GetFingerprintCount()).
			Msg("Fingerprint collection complete")
	} else {
		log.Debug().
			Int("total", s.store.GetFingerprintCount()).
			Msg("Fingerprint collection complete - no changes")
	}

	// Cleanup orphaned data (fingerprints/discoveries for removed resources)
	s.cleanupOrphanedData(state)
}

// cleanupOrphanedData removes fingerprints and discoveries for resources that no longer exist.
func (s *Service) cleanupOrphanedData(state StateSnapshot) {
	// Safety check: Don't cleanup if state appears empty
	// This prevents catastrophic deletion if state provider has an error
	totalResources := len(state.Containers) + len(state.VMs) + len(state.KubernetesClusters)
	for _, host := range state.DockerHosts {
		totalResources += len(host.Containers)
	}
	if totalResources == 0 {
		log.Debug().Msg("Skipping orphaned data cleanup - state is empty (may be an error)")
		return
	}

	// Build set of current resource IDs
	currentIDs := make(map[string]bool)

	// Docker containers
	for _, host := range state.DockerHosts {
		for _, container := range host.Containers {
			fpKey := "docker:" + host.AgentID + ":" + container.Name
			currentIDs[fpKey] = true
		}
	}

	// LXC containers
	for _, lxc := range state.Containers {
		fpKey := "lxc:" + lxc.Node + ":" + strconv.Itoa(lxc.VMID)
		currentIDs[fpKey] = true
	}

	// VMs
	for _, vm := range state.VMs {
		fpKey := "vm:" + vm.Node + ":" + strconv.Itoa(vm.VMID)
		currentIDs[fpKey] = true
	}

	// Kubernetes pods
	for _, cluster := range state.KubernetesClusters {
		for _, pod := range cluster.Pods {
			fpKey := "k8s:" + cluster.ID + ":" + pod.Namespace + "/" + pod.Name
			currentIDs[fpKey] = true
		}
	}

	// Run cleanup
	fpRemoved := s.store.CleanupOrphanedFingerprints(currentIDs)
	discRemoved := s.store.CleanupOrphanedDiscoveries(currentIDs)

	if fpRemoved > 0 || discRemoved > 0 {
		log.Info().
			Int("fingerprints_removed", fpRemoved).
			Int("discoveries_removed", discRemoved).
			Msg("Cleaned up orphaned data")
	}
}

// discoverDockerContainers runs discovery on Docker containers using metadata.
// Automatically runs deep scans when the shallow scan results are incomplete or low-confidence.
func (s *Service) discoverDockerContainers(ctx context.Context, hosts []DockerHost) {
	s.mu.RLock()
	analyzer := s.aiAnalyzer
	s.mu.RUnlock()

	if analyzer == nil {
		log.Debug().Msg("AI analyzer not set, skipping Docker discovery")
		return
	}

	for _, host := range hosts {
		for _, container := range host.Containers {
			select {
			case <-ctx.Done():
				return
			default:
			}

			// Build resource ID
			id := MakeResourceID(ResourceTypeDocker, host.AgentID, container.Name)

			// Check if we already have a recent discovery
			if !s.store.NeedsRefresh(id, s.cacheExpiry) {
				continue
			}

			// Check existing discovery to see if it needs a deep scan
			existing, _ := s.store.Get(id)

			// Analyze using metadata (shallow discovery)
			discovery := s.analyzeDockerContainer(ctx, analyzer, container, host)
			if discovery != nil {
				// Smart auto deep scan: enhance if discovery is incomplete or low-confidence
				// Also deep scan if there's no existing discovery (first time)
				if s.scanner != nil && (existing == nil || s.needsDeepScan(discovery)) {
					log.Info().
						Str("id", id).
						Float64("confidence", discovery.Confidence).
						Str("serviceType", discovery.ServiceType).
						Bool("firstDiscovery", existing == nil).
						Msg("Auto deep scan triggered due to incomplete discovery")
					discovery = s.enhanceWithDeepScan(ctx, discovery, host)
				}

				// Suggest web interface URL using Docker host hostname
				discovery.SuggestedURL = SuggestWebURL(discovery, host.Hostname)

				if err := s.store.Save(discovery); err != nil {
					log.Warn().Err(err).Str("id", id).Msg("Failed to save discovery")
				}
			}
		}
	}
}

// enhanceWithDeepScan runs a deep scan and merges the results into the discovery.
func (s *Service) enhanceWithDeepScan(ctx context.Context, discovery *ResourceDiscovery, host DockerHost) *ResourceDiscovery {
	s.mu.RLock()
	timeout := s.deepScanTimeout
	analyzer := s.aiAnalyzer
	s.mu.RUnlock()

	if s.scanner == nil || analyzer == nil {
		return discovery
	}

	// Create a timeout context for the deep scan
	scanCtx, cancel := context.WithTimeout(ctx, timeout)
	defer cancel()

	req := DiscoveryRequest{
		ResourceType: discovery.ResourceType,
		ResourceID:   discovery.ResourceID,
		HostID:       discovery.HostID,
		Hostname:     discovery.Hostname,
	}

	scanResult, err := s.scanner.Scan(scanCtx, req)
	if err != nil {
		log.Debug().Err(err).Str("id", discovery.ID).Msg("Deep scan failed during background discovery")
		return discovery
	}

	if len(scanResult.CommandOutputs) == 0 {
		return discovery
	}

	// Build analysis request with command outputs
	analysisReq := AIAnalysisRequest{
		ResourceType:   discovery.ResourceType,
		ResourceID:     discovery.ResourceID,
		HostID:         discovery.HostID,
		Hostname:       discovery.Hostname,
		CommandOutputs: scanResult.CommandOutputs,
	}

	// Add metadata if available
	if s.stateProvider != nil {
		analysisReq.Metadata = s.getResourceMetadata(req)
	}

	// Build prompt and analyze
	prompt := s.buildDeepAnalysisPrompt(analysisReq)
	response, err := analyzer.AnalyzeForDiscovery(scanCtx, prompt)
	if err != nil {
		log.Debug().Err(err).Str("id", discovery.ID).Msg("Deep analysis failed during background discovery")
		return discovery
	}

	result := s.parseAIResponse(response)
	if result == nil {
		return discovery
	}

	// Merge results - deep scan results take precedence for non-empty fields
	if result.ServiceType != "" && result.ServiceType != "unknown" {
		discovery.ServiceType = result.ServiceType
	}
	if result.ServiceName != "" {
		discovery.ServiceName = result.ServiceName
	}
	if result.ServiceVersion != "" {
		discovery.ServiceVersion = result.ServiceVersion
	}
	if result.Category != "" && result.Category != CategoryUnknown {
		discovery.Category = result.Category
	}
	if result.CLIAccess != "" {
		discovery.CLIAccess = s.formatCLIAccess(discovery.ResourceType, discovery.ResourceID, result.CLIAccess)
	}
	if len(result.Facts) > 0 {
		discovery.Facts = result.Facts
	}
	if len(result.ConfigPaths) > 0 {
		discovery.ConfigPaths = result.ConfigPaths
	}
	if len(result.DataPaths) > 0 {
		discovery.DataPaths = result.DataPaths
	}
	if len(result.LogPaths) > 0 {
		discovery.LogPaths = result.LogPaths
	}
	if len(result.Ports) > 0 {
		discovery.Ports = result.Ports
	}
	if result.Confidence > discovery.Confidence {
		discovery.Confidence = result.Confidence
	}
	if result.Reasoning != "" {
		discovery.AIReasoning = result.Reasoning
	}

	// Store raw command outputs
	discovery.RawCommandOutput = scanResult.CommandOutputs
	discovery.ScanDuration = scanResult.CompletedAt.Sub(scanResult.StartedAt).Milliseconds()
	discovery.UpdatedAt = time.Now()

	// Parse docker_mounts if present (for LXCs/VMs running Docker)
	if dockerMountsOutput, ok := scanResult.CommandOutputs["docker_mounts"]; ok {
		discovery.DockerMounts = parseDockerMounts(dockerMountsOutput)
		if len(discovery.DockerMounts) > 0 {
			log.Debug().
				Str("id", discovery.ID).
				Int("mountCount", len(discovery.DockerMounts)).
				Msg("Parsed Docker bind mounts from discovery")
		}
	}

	log.Info().
		Str("id", discovery.ID).
		Int("commandOutputs", len(scanResult.CommandOutputs)).
		Int("dockerMounts", len(discovery.DockerMounts)).
		Dur("scanDuration", scanResult.CompletedAt.Sub(scanResult.StartedAt)).
		Msg("Enhanced discovery with deep scan")

	return discovery
}

// analyzeDockerContainer analyzes a Docker container using AI.
func (s *Service) analyzeDockerContainer(ctx context.Context, analyzer AIAnalyzer, c DockerContainer, host DockerHost) *ResourceDiscovery {
	// Check cache first (per-image timestamp)
	s.cacheMu.RLock()
	entry, found := s.analysisCache[c.Image]
	cacheValid := found && time.Since(entry.cachedAt) < s.cacheExpiry
	s.cacheMu.RUnlock()

	var result *AIAnalysisResponse

	if cacheValid {
		result = entry.result
	} else {
		// Build prompt for AI analysis
		prompt := s.buildMetadataAnalysisPrompt(c, host)

		response, err := analyzer.AnalyzeForDiscovery(ctx, prompt)
		if err != nil {
			log.Warn().Err(err).Str("container", c.Name).Msg("AI analysis failed")
			return nil
		}

		result = s.parseAIResponse(response)
		if result == nil {
			log.Warn().Str("container", c.Name).Msg("Failed to parse AI response")
			return nil
		}

		// Cache the result with its own timestamp
		s.cacheMu.Lock()
		s.analysisCache[c.Image] = &analysisCacheEntry{
			result:   result,
			cachedAt: time.Now(),
		}
		s.cacheMu.Unlock()
	}

	// Skip unknown/low-confidence results
	if result.ServiceType == "unknown" || result.Confidence < 0.5 {
		return nil
	}

	// Build CLI access string
	cliAccess := result.CLIAccess
	if cliAccess != "" {
		cliAccess = strings.ReplaceAll(cliAccess, "{container}", c.Name)
	}

	// Extract ports
	var ports []PortInfo
	for _, p := range c.Ports {
		ports = append(ports, PortInfo{
			Port:     p.PrivatePort,
			Protocol: p.Protocol,
			Address:  fmt.Sprintf(":%d", p.PublicPort),
		})
	}

	return &ResourceDiscovery{
		ID:             MakeResourceID(ResourceTypeDocker, host.AgentID, c.Name),
		ResourceType:   ResourceTypeDocker,
		ResourceID:     c.Name,
		HostID:         host.AgentID,
		Hostname:       host.Hostname,
		ServiceType:    result.ServiceType,
		ServiceName:    result.ServiceName,
		ServiceVersion: result.ServiceVersion,
		Category:       result.Category,
		CLIAccess:      cliAccess,
		Facts:          result.Facts,
		ConfigPaths:    result.ConfigPaths,
		DataPaths:      result.DataPaths,
		LogPaths:       result.LogPaths,
		Ports:          ports,
		Confidence:     result.Confidence,
		AIReasoning:    result.Reasoning,
		DiscoveredAt:   time.Now(),
		UpdatedAt:      time.Now(),
	}
}

// DiscoverResource performs deep discovery on a specific resource.
// Uses fingerprint-based detection to avoid unnecessary AI calls:
// - Returns cached discovery if fingerprint hasn't changed
// - Runs discovery only when fingerprint changed or discovery is too old
// - Prevents duplicate concurrent discoveries for the same resource
func (s *Service) DiscoverResource(ctx context.Context, req DiscoveryRequest) (*ResourceDiscovery, error) {
	// Redirect PVE node requests to linked host agent if available
	// This ensures we always scan and store data under the canonical Host Agent ID
	if req.ResourceType == ResourceTypeHost && s.stateProvider != nil {
		state := s.stateProvider.GetState()
		for _, node := range state.Nodes {
			// Check if the requested ID matches the Node Name or ID
			if node.Name == req.HostID || node.Name == req.ResourceID || node.ID == req.ResourceID {
				if node.LinkedHostAgentID != "" {
					log.Info().
						Str("from_host", req.HostID).
						Str("to_agent", node.LinkedHostAgentID).
						Msg("Redirecting discovery scan to linked host agent")
					req.HostID = node.LinkedHostAgentID
					req.ResourceID = node.LinkedHostAgentID
				}
				break
			}
		}
	}

	resourceID := MakeResourceID(req.ResourceType, req.HostID, req.ResourceID)

	// Get current fingerprint (if available)
	// Fingerprint key matches the resource ID format: type:host:id
	currentFP, _ := s.store.GetFingerprint(resourceID)

	// Get existing discovery
	existing, _ := s.store.Get(resourceID)

	// Determine if we need to run discovery
	needsDiscovery := false
	reason := ""

	if req.Force {
		needsDiscovery = true
		reason = "forced"
	} else if existing == nil {
		needsDiscovery = true
		reason = "no existing discovery"
	} else if currentFP != nil && existing.Fingerprint != currentFP.Hash {
		// Fingerprint hash differs - check if it's just a schema version change
		if existing.FingerprintSchemaVersion != 0 && existing.FingerprintSchemaVersion != currentFP.SchemaVersion {
			// Schema changed but container didn't - don't trigger rediscovery
			// This prevents mass rediscovery when we upgrade the fingerprint algorithm
			log.Debug().
				Str("id", resourceID).
				Int("old_schema", existing.FingerprintSchemaVersion).
				Int("new_schema", currentFP.SchemaVersion).
				Msg("Fingerprint schema changed, but not triggering rediscovery")
		} else {
			// Same schema version, different hash = real container change
			needsDiscovery = true
			reason = "fingerprint changed"
		}
	} else if time.Since(existing.DiscoveredAt) > s.maxDiscoveryAge {
		needsDiscovery = true
		reason = "discovery too old"
	}

	// Return cached discovery if still valid
	if !needsDiscovery && existing != nil {
		log.Debug().Str("id", resourceID).Msg("Discovery still valid, returning cached")
		return existing, nil
	}

	// Check for duplicate concurrent discovery requests
	s.inProgressMu.Lock()
	if inProg, ok := s.inProgress[resourceID]; ok {
		// Discovery already in progress - wait for it
		s.inProgressMu.Unlock()
		log.Debug().Str("id", resourceID).Msg("Discovery already in progress, waiting for result")

		select {
		case <-inProg.done:
			return inProg.result, inProg.err
		case <-ctx.Done():
			return nil, ctx.Err()
		}
	}

	// Claim this discovery slot
	inProg := &discoveryInProgress{
		done: make(chan struct{}),
	}
	s.inProgress[resourceID] = inProg
	s.inProgressMu.Unlock()

	// Ensure we clean up and notify waiters when done
	defer func() {
		close(inProg.done)
		s.inProgressMu.Lock()
		delete(s.inProgress, resourceID)
		s.inProgressMu.Unlock()
	}()

	log.Info().Str("id", resourceID).Str("reason", reason).Msg("Running discovery")

	s.mu.RLock()
	analyzer := s.aiAnalyzer
	s.mu.RUnlock()

	if analyzer == nil {
		inProg.err = fmt.Errorf("AI analyzer not configured")
		return nil, inProg.err
	}

	// Run deep scan if scanner is available
	var scanResult *ScanResult
	var scanError error
	if s.scanner != nil {
		scanResult, scanError = s.scanner.Scan(ctx, req)
		if scanError != nil {
			log.Warn().
				Err(scanError).
				Str("id", resourceID).
				Str("resource_type", string(req.ResourceType)).
				Msg("Deep scan failed, falling back to metadata-only analysis. For full discovery, ensure the host agent is connected with commands enabled.")
		}
	}

	// Build analysis request
	analysisReq := AIAnalysisRequest{
		ResourceType: req.ResourceType,
		ResourceID:   req.ResourceID,
		HostID:       req.HostID,
		Hostname:     req.Hostname,
	}

	if scanResult != nil {
		analysisReq.CommandOutputs = scanResult.CommandOutputs
	}

	// Add metadata if available
	if s.stateProvider != nil {
		analysisReq.Metadata = s.getResourceMetadata(req)
	}

	// Build prompt and analyze
	prompt := s.buildDeepAnalysisPrompt(analysisReq)

	// Broadcast progress: AI analysis starting
	s.broadcastProgress(&DiscoveryProgress{
		ResourceID:  resourceID,
		Status:      DiscoveryStatusRunning,
		CurrentStep: "Analyzing with Pulse Assistant...",
	})

	response, err := analyzer.AnalyzeForDiscovery(ctx, prompt)
	if err != nil {
		inProg.err = fmt.Errorf("AI analysis failed: %w", err)
		return nil, inProg.err
	}

	result := s.parseAIResponse(response)
	if result == nil {
		// Truncate response for error message
		truncated := response
		if len(truncated) > 500 {
			truncated = truncated[:500] + "..."
		}
		inProg.err = fmt.Errorf("failed to parse AI response: %s", truncated)
		return nil, inProg.err
	}

	// Resolve hostname from metadata if not provided in request
	hostname := req.Hostname
	if hostname == "" && analysisReq.Metadata != nil {
		if name, ok := analysisReq.Metadata["name"].(string); ok && name != "" {
			hostname = name
		}
	}

	// Build discovery result
	discovery := &ResourceDiscovery{
		ID:               resourceID,
		ResourceType:     req.ResourceType,
		ResourceID:       req.ResourceID,
		HostID:           req.HostID,
		Hostname:         hostname,
		ServiceType:      result.ServiceType,
		ServiceName:      result.ServiceName,
		ServiceVersion:   result.ServiceVersion,
		Category:         result.Category,
		CLIAccess:        s.formatCLIAccess(req.ResourceType, req.ResourceID, result.CLIAccess),
		CLIAccessVersion: CLIAccessVersion,
		Facts:            result.Facts,
		ConfigPaths:      result.ConfigPaths,
		DataPaths:        result.DataPaths,
		LogPaths:         result.LogPaths,
		Ports:            result.Ports,
		Confidence:       result.Confidence,
		AIReasoning:      result.Reasoning,
		DiscoveredAt:     time.Now(),
		UpdatedAt:        time.Now(),
	}

	// Store fingerprint with discovery
	if currentFP != nil {
		discovery.Fingerprint = currentFP.Hash
		discovery.FingerprintedAt = currentFP.GeneratedAt
		discovery.FingerprintSchemaVersion = currentFP.SchemaVersion
	}

	if scanResult != nil {
		discovery.RawCommandOutput = scanResult.CommandOutputs
		discovery.ScanDuration = scanResult.CompletedAt.Sub(scanResult.StartedAt).Milliseconds()

		// Parse docker_mounts if present (for LXCs/VMs running Docker)
		if dockerMountsOutput, ok := scanResult.CommandOutputs["docker_mounts"]; ok {
			discovery.DockerMounts = parseDockerMounts(dockerMountsOutput)
			if len(discovery.DockerMounts) > 0 {
				log.Debug().
					Str("id", discovery.ID).
					Int("mountCount", len(discovery.DockerMounts)).
					Msg("Parsed Docker bind mounts from on-demand discovery")
			}
		}
	} else if scanError != nil {
		// Add note to reasoning when we couldn't run commands
		metadataNote := "[Note: Discovery was limited to metadata-only analysis because command execution was unavailable. "
		if strings.Contains(scanError.Error(), "no connected agent") {
			metadataNote += "To enable full discovery with command execution, ensure the host agent has 'Pulse Commands' enabled in Settings → Unified Agents.]"
		} else {
			metadataNote += "Error: " + scanError.Error() + "]"
		}
		if discovery.AIReasoning != "" {
			discovery.AIReasoning = metadataNote + " " + discovery.AIReasoning
		} else {
			discovery.AIReasoning = metadataNote
		}
	}

	// Preserve user notes from existing discovery
	if existing != nil {
		discovery.UserNotes = existing.UserNotes
		discovery.UserSecrets = existing.UserSecrets
		if discovery.DiscoveredAt.IsZero() || existing.DiscoveredAt.Before(discovery.DiscoveredAt) {
			discovery.DiscoveredAt = existing.DiscoveredAt
		}
	}

	// Suggest web interface URL based on service type and external IP
	if s.stateProvider != nil {
		if externalIP := s.getResourceExternalIP(req); externalIP != "" {
			discovery.SuggestedURL = SuggestWebURL(discovery, externalIP)
		}
	}

	// Broadcast progress: Discovery complete
	s.broadcastProgress(&DiscoveryProgress{
		ResourceID:      resourceID,
		Status:          DiscoveryStatusCompleted,
		CurrentStep:     "Discovery complete",
		PercentComplete: 100,
	})

	// Save discovery
	if err := s.store.Save(discovery); err != nil {
		inProg.err = fmt.Errorf("failed to save discovery: %w", err)
		return nil, inProg.err
	}

	// Store result for any waiting goroutines
	inProg.result = discovery
	return discovery, nil
}

// getResourceMetadata retrieves metadata for a resource from the state.
func (s *Service) getResourceMetadata(req DiscoveryRequest) map[string]any {
	if s.stateProvider == nil {
		return nil
	}

	state := s.stateProvider.GetState()
	metadata := make(map[string]any)

	switch req.ResourceType {
	case ResourceTypeLXC:
		for _, c := range state.Containers {
			if fmt.Sprintf("%d", c.VMID) == req.ResourceID && c.Node == req.HostID {
				metadata["name"] = c.Name
				metadata["status"] = c.Status
				metadata["vmid"] = c.VMID
				break
			}
		}
	case ResourceTypeVM:
		for _, vm := range state.VMs {
			if fmt.Sprintf("%d", vm.VMID) == req.ResourceID && vm.Node == req.HostID {
				metadata["name"] = vm.Name
				metadata["status"] = vm.Status
				metadata["vmid"] = vm.VMID
				break
			}
		}
	case ResourceTypeDocker:
		for _, host := range state.DockerHosts {
			if host.AgentID == req.HostID || host.Hostname == req.HostID {
				for _, c := range host.Containers {
					if c.Name == req.ResourceID {
						metadata["image"] = c.Image
						metadata["status"] = c.Status
						// Filter sensitive labels before sending to AI
						metadata["labels"] = filterSensitiveLabels(c.Labels)
						break
					}
				}
				break
			}
		}
	case ResourceTypeHost:
		for _, host := range state.Hosts {
			if host.ID == req.ResourceID || host.Hostname == req.ResourceID || host.ID == req.HostID {
				metadata["hostname"] = host.Hostname
				metadata["display_name"] = host.DisplayName
				metadata["platform"] = host.Platform
				metadata["os_name"] = host.OSName
				metadata["os_version"] = host.OSVersion
				metadata["kernel_version"] = host.KernelVersion
				metadata["architecture"] = host.Architecture
				metadata["cpu_count"] = host.CPUCount
				metadata["status"] = host.Status
				if len(host.Tags) > 0 {
					metadata["tags"] = host.Tags
				}
				break
			}
		}
	}

	return metadata
}

// getResourceExternalIP retrieves the external IP address for a resource from the state.
// For LXC/VM, this is the first IP from the Proxmox guest agent.
// For Docker containers, this is the Docker host's IP/hostname.
func (s *Service) getResourceExternalIP(req DiscoveryRequest) string {
	if s.stateProvider == nil {
		return ""
	}

	state := s.stateProvider.GetState()

	switch req.ResourceType {
	case ResourceTypeLXC:
		for _, c := range state.Containers {
			if fmt.Sprintf("%d", c.VMID) == req.ResourceID && c.Node == req.HostID {
				if len(c.IPAddresses) > 0 {
					return c.IPAddresses[0]
				}
				return ""
			}
		}
	case ResourceTypeVM:
		for _, vm := range state.VMs {
			if fmt.Sprintf("%d", vm.VMID) == req.ResourceID && vm.Node == req.HostID {
				if len(vm.IPAddresses) > 0 {
					return vm.IPAddresses[0]
				}
				return ""
			}
		}
	case ResourceTypeDocker:
		// For Docker containers, use the Docker host's hostname/IP
		for _, host := range state.DockerHosts {
			if host.AgentID == req.HostID || host.Hostname == req.HostID {
				// Use hostname if it looks like an IP, otherwise it's a hostname
				return host.Hostname
			}
		}
	case ResourceTypeDockerVM, ResourceTypeDockerLXC:
		// For Docker containers inside VMs/LXCs, find the VM/LXC's IP
		// The hostID contains the parent resource info
		for _, vm := range state.VMs {
			if fmt.Sprintf("%d", vm.VMID) == req.HostID || vm.Name == req.HostID {
				if len(vm.IPAddresses) > 0 {
					return vm.IPAddresses[0]
				}
			}
		}
		for _, c := range state.Containers {
			if fmt.Sprintf("%d", c.VMID) == req.HostID || c.Name == req.HostID {
				if len(c.IPAddresses) > 0 {
					return c.IPAddresses[0]
				}
			}
		}
	}

	return ""
}

// formatCLIAccess formats the CLI access string with actual values.
func (s *Service) formatCLIAccess(resourceType ResourceType, resourceID, cliTemplate string) string {
	if cliTemplate == "" {
		// Use default template
		cliTemplate = GetCLIAccessTemplate(resourceType)
	}

	result := cliTemplate
	result = strings.ReplaceAll(result, "{vmid}", resourceID)
	result = strings.ReplaceAll(result, "{container}", resourceID)
	result = strings.ReplaceAll(result, "{command}", "...")

	return result
}

// buildMetadataAnalysisPrompt builds a prompt for shallow metadata-based analysis.
func (s *Service) buildMetadataAnalysisPrompt(c DockerContainer, host DockerHost) string {
	info := map[string]any{
		"name":   c.Name,
		"image":  c.Image,
		"status": c.Status,
		"host":   host.Hostname,
	}

	if len(c.Ports) > 0 {
		var ports []map[string]any
		for _, p := range c.Ports {
			ports = append(ports, map[string]any{
				"public":   p.PublicPort,
				"private":  p.PrivatePort,
				"protocol": p.Protocol,
			})
		}
		info["ports"] = ports
	}

	if len(c.Labels) > 0 {
		// Filter sensitive labels before sending to AI
		info["labels"] = filterSensitiveLabels(c.Labels)
	}

	if len(c.Mounts) > 0 {
		var mounts []string
		for _, m := range c.Mounts {
			mounts = append(mounts, m.Destination)
		}
		info["mounts"] = mounts
	}

	infoJSON, _ := json.MarshalIndent(info, "", "  ")

	return fmt.Sprintf(`Analyze this Docker container and identify what service it's running.

Container Information:
%s

Based on the image name, ports, labels, and mounts, determine:
1. What service/application is this?
2. What category does it belong to?
3. How should CLI commands be executed?

Respond in this exact JSON format:
{
  "service_type": "lowercase_type",
  "service_name": "Human Readable Name",
  "service_version": "version if detectable from image tag",
  "category": "database|web_server|cache|monitoring|backup|nvr|storage|container|network|security|media|home_automation|unknown",
  "cli_access": "docker exec {container} <cli-tool>",
  "facts": [],
  "config_paths": [],
  "data_paths": [],
  "log_paths": [],
  "ports": [],
  "confidence": 0.0-1.0,
  "reasoning": "Brief explanation"
}

Respond with ONLY valid JSON.`, string(infoJSON))
}

// buildDeepAnalysisPrompt builds a prompt for deep analysis with command outputs.
func (s *Service) buildDeepAnalysisPrompt(req AIAnalysisRequest) string {
	var sections []string

	sections = append(sections, fmt.Sprintf(`Resource Type: %s
Resource ID: %s
Host: %s (%s)`, req.ResourceType, req.ResourceID, req.Hostname, req.HostID))

	if len(req.Metadata) > 0 {
		metaJSON, _ := json.MarshalIndent(req.Metadata, "", "  ")
		sections = append(sections, fmt.Sprintf("Metadata:\n%s", string(metaJSON)))
	}

	if len(req.CommandOutputs) > 0 {
		sections = append(sections, "Command Outputs:")
		for name, output := range req.CommandOutputs {
			// Truncate long outputs
			if len(output) > 2000 {
				output = output[:2000] + "\n... (truncated)"
			}
			sections = append(sections, fmt.Sprintf("--- %s ---\n%s", name, output))
		}
	}

	// Use different prompts for HOST vs other resource types
	if req.ResourceType == ResourceTypeHost {
		return fmt.Sprintf(`Analyze this HOST system and provide detailed discovery information.

%s

IMPORTANT: This is a HOST discovery. Focus on identifying the HOST OPERATING SYSTEM and its primary role/purpose, NOT individual services or containers running on it.

Based on all available information, determine:
1. What is the host operating system? (e.g., Unraid, Proxmox, Ubuntu Server, Debian, TrueNAS)
2. What is the OS version?
3. What is the primary role/purpose of this host? (e.g., NAS, hypervisor, media server, backup server)
4. What are the key system paths?
5. What storage is available?
6. What services are running? (list as facts, not as the primary identification)

Respond in this exact JSON format:
{
  "service_type": "lowercase_os_type (e.g., unraid, proxmox, ubuntu, debian, truenas)",
  "service_name": "Human Readable OS Name and Role (e.g., Unraid NAS Server, Proxmox VE Hypervisor)",
  "service_version": "OS version number",
  "category": "storage|virtualizer|container|network|unknown",
  "cli_access": "ssh user@hostname",
  "facts": [
    {"category": "version|config|service|port|hardware|network|storage|dependency|security", "key": "fact_name", "value": "fact_value", "source": "command_name", "confidence": 0.9}
  ],
  "config_paths": ["/etc/", "/boot/config/"],
  "data_paths": ["/mnt/data", "/storage"],
  "log_paths": ["/var/log/"],
  "ports": [{"port": 22, "protocol": "tcp", "process": "sshd", "address": "0.0.0.0"}],
  "confidence": 0.0-1.0,
  "reasoning": "Explanation of host identification"
}

Important:
- The service_type and service_name MUST reflect the HOST OS, not services running on it
- List Docker containers, VMs, or other services as facts with category "service"
- Include storage information (disks, pools, arrays) as facts with category "storage"
- Include hardware info (CPU, RAM) as facts with category "hardware"

Respond with ONLY valid JSON.`, strings.Join(sections, "\n\n"))
	}

	return fmt.Sprintf(`Analyze this infrastructure resource and provide detailed discovery information.

%s

Based on all available information, determine:
1. What service/application is running?
2. What version is it?
3. What are the important configuration paths?
4. What data paths should be backed up?
5. What log paths are useful for troubleshooting?
6. What ports are in use?
7. Any special hardware (GPU, TPU, etc.)?
8. Any dependencies (databases, message queues, etc.)?

Respond in this exact JSON format:
{
  "service_type": "lowercase_type (e.g., frigate, postgres, pbs)",
  "service_name": "Human Readable Name",
  "service_version": "version number if found",
  "category": "database|web_server|cache|monitoring|backup|nvr|storage|container|virtualizer|network|security|media|home_automation|unknown",
  "cli_access": "command to access this service's CLI",
  "facts": [
    {"category": "version|config|service|port|hardware|network|storage|dependency|security", "key": "fact_name", "value": "fact_value", "source": "command_name", "confidence": 0.9}
  ],
  "config_paths": ["/path/to/config.yml"],
  "data_paths": ["/path/to/data"],
  "log_paths": ["/var/log/service/", "/path/to/app.log"],
  "ports": [{"port": 8080, "protocol": "tcp", "process": "nginx", "address": "0.0.0.0"}],
  "confidence": 0.0-1.0,
  "reasoning": "Explanation of identification"
}

Important:
- Extract version numbers from package lists, process output, or config files
- Identify config and data paths from mount points and file listings
- Identify log paths (e.g., /var/log/, application-specific logs) for troubleshooting
- Note any special hardware like Coral TPU, NVIDIA GPU
- For LXC/VM, the CLI access should use pct exec or qm guest exec
- For Docker, use docker exec

Respond with ONLY valid JSON.`, strings.Join(sections, "\n\n"))
}

// parseAIResponse parses the AI's JSON response.
func (s *Service) parseAIResponse(response string) *AIAnalysisResponse {
	log.Debug().Str("raw_response", response).Msg("Discovery raw response")
	response = strings.TrimSpace(response)

	// Handle markdown code blocks
	if strings.HasPrefix(response, "```") {
		lines := strings.Split(response, "\n")
		var jsonLines []string
		inBlock := false
		for _, line := range lines {
			if strings.HasPrefix(line, "```") {
				inBlock = !inBlock
				continue
			}
			if inBlock {
				jsonLines = append(jsonLines, line)
			}
		}
		response = strings.Join(jsonLines, "\n")
	}

	// Find JSON object
	start := strings.Index(response, "{")
	end := strings.LastIndex(response, "}")
	if start >= 0 && end > start {
		response = response[start : end+1]
	}

	var result AIAnalysisResponse
	if err := json.Unmarshal([]byte(response), &result); err != nil {
		log.Debug().Err(err).Str("response", response).Msg("Failed to parse AI response")
		return nil
	}

	// Set discovered_at for facts
	now := time.Now()
	for i := range result.Facts {
		result.Facts[i].DiscoveredAt = now
	}

	return &result
}

// parseDockerMounts parses the docker_mounts command output into a slice of DockerBindMount.
// The output format is:
// CONTAINER:container_name
// source|destination|type
// source|destination|type
// CONTAINER:another_container
// source|destination|type
func parseDockerMounts(output string) []DockerBindMount {
	if output == "" || output == "no_docker_mounts" {
		return nil
	}

	var mounts []DockerBindMount
	var currentContainer string

	lines := strings.Split(output, "\n")
	for _, line := range lines {
		line = strings.TrimSpace(line)
		if line == "" {
			continue
		}

		// Check if this is a container header
		if strings.HasPrefix(line, "CONTAINER:") {
			currentContainer = strings.TrimPrefix(line, "CONTAINER:")
			continue
		}

		// Skip if we don't have a current container
		if currentContainer == "" {
			continue
		}

		// Parse mount line: source|destination|type
		parts := strings.Split(line, "|")
		if len(parts) < 2 {
			continue
		}

		mount := DockerBindMount{
			ContainerName: currentContainer,
			Source:        parts[0],
			Destination:   parts[1],
		}
		if len(parts) >= 3 {
			mount.Type = parts[2]
		}

		// Only include bind mounts and volumes (skip tmpfs, etc.)
		if mount.Type == "" || mount.Type == "bind" || mount.Type == "volume" {
			mounts = append(mounts, mount)
		}
	}

	return mounts
}

// GetDiscovery retrieves a discovery by ID.
func (s *Service) GetDiscovery(id string) (*ResourceDiscovery, error) {
	d, err := s.store.Get(id)
	if err != nil || d == nil {
		return d, err
	}
	s.upgradeCLIAccessIfNeeded(d)
	return d, nil
}

func (s *Service) GetDiscoveryByResource(resourceType ResourceType, hostID, resourceID string) (*ResourceDiscovery, error) {
	originalHostID := hostID
	originalResourceID := resourceID
	redirected := false

	// Redirect PVE node lookups to linked host agent if available
	// This ensures UI components looking up a PVE node by name (e.g. NodeDrawer) get the data associated with the Host Agent
	if resourceType == ResourceTypeHost && s.stateProvider != nil {
		state := s.stateProvider.GetState()
		for _, node := range state.Nodes {
			if node.Name == hostID || node.Name == resourceID || node.ID == resourceID {
				if node.LinkedHostAgentID != "" {
					log.Debug().
						Str("from_host", hostID).
						Str("to_agent", node.LinkedHostAgentID).
						Msg("Redirecting discovery lookup to linked host agent")
					hostID = node.LinkedHostAgentID
					resourceID = node.LinkedHostAgentID
					redirected = true
				}
				break
			}
		}
	}

	d, err := s.store.GetByResource(resourceType, hostID, resourceID)
	// If redirected and not found, try the original ID (fallback for unmigrated data)
	if (err != nil || d == nil) && redirected {
		log.Debug().
			Str("redirected_host", hostID).
			Str("original_host", originalHostID).
			Msg("Redirected lookup failed, trying fallback to original ID")
		dOriginal, errOriginal := s.store.GetByResource(resourceType, originalHostID, originalResourceID)
		if errOriginal == nil && dOriginal != nil {
			log.Debug().
				Str("original_host", originalHostID).
				Msg("Fallback lookup succeeded - returning legacy discovery")
			s.upgradeCLIAccessIfNeeded(dOriginal)
			return dOriginal, nil
		}
	}

	if err != nil || d == nil {
		return d, err
	}
	s.upgradeCLIAccessIfNeeded(d)
	return d, nil
}

// ListDiscoveries returns all discoveries.
func (s *Service) ListDiscoveries() ([]*ResourceDiscovery, error) {
	discoveries, err := s.store.List()
	if err != nil {
		return nil, err
	}
	discoveries = s.deduplicateDiscoveries(discoveries)
	for _, d := range discoveries {
		s.upgradeCLIAccessIfNeeded(d)
	}
	return discoveries, nil
}

// ListDiscoveriesByType returns discoveries for a specific resource type.
func (s *Service) ListDiscoveriesByType(resourceType ResourceType) ([]*ResourceDiscovery, error) {
	discoveries, err := s.store.ListByType(resourceType)
	if err != nil {
		return nil, err
	}
	discoveries = s.deduplicateDiscoveries(discoveries)
	for _, d := range discoveries {
		s.upgradeCLIAccessIfNeeded(d)
	}
	return discoveries, nil
}

// ListDiscoveriesByHost returns discoveries for a specific host.
func (s *Service) ListDiscoveriesByHost(hostID string) ([]*ResourceDiscovery, error) {
	discoveries, err := s.store.ListByHost(hostID)
	if err != nil {
		return nil, err
	}
	discoveries = s.deduplicateDiscoveries(discoveries)
	for _, d := range discoveries {
		s.upgradeCLIAccessIfNeeded(d)
	}
	return discoveries, nil
}

// deduplicateDiscoveries filters out redundant discoveries where a PVE node
// is represented by both its Node Name and its Linked Host Agent ID.
// The Host Agent ID is preferred.
func (s *Service) deduplicateDiscoveries(discoveries []*ResourceDiscovery) []*ResourceDiscovery {
	if s.stateProvider == nil {
		return discoveries
	}

	state := s.stateProvider.GetState()
	if len(state.Nodes) == 0 {
		return discoveries
	}

	// Map linked agent IDs to their PVE node source(s)
	// AgentID -> NodeName
	linkedAgents := make(map[string]string)
	for _, node := range state.Nodes {
		if node.LinkedHostAgentID != "" {
			linkedAgents[node.LinkedHostAgentID] = node.Name
		}
	}

	if len(linkedAgents) == 0 {
		return discoveries
	}

	// Check which agents actually have discovery data
	hasAgentDiscovery := make(map[string]bool)
	for _, d := range discoveries {
		if d.ResourceType == ResourceTypeHost {
			// d.HostID is usually the agent ID for host resources
			if _, ok := linkedAgents[d.HostID]; ok {
				hasAgentDiscovery[d.HostID] = true
			}
		}
	}

	// Filter out PVE node discoveries if the corresponding agent discovery exists
	filtered := make([]*ResourceDiscovery, 0, len(discoveries))
	for _, d := range discoveries {
		if d.ResourceType == ResourceTypeHost {
			// If this discovery is for a PVE node (by name/ID)
			// check if it maps to an agent that ALREADY has a discovery in this list

			// Is this discovery's ID satisfying a Node check?
			isPVENode := false
			var linkedAgentID string

			for _, node := range state.Nodes {
				if d.HostID == node.Name || d.HostID == node.ID || d.ResourceID == node.Name {
					isPVENode = true
					linkedAgentID = node.LinkedHostAgentID
					break
				}
			}

			if isPVENode && linkedAgentID != "" && hasAgentDiscovery[linkedAgentID] && d.HostID != linkedAgentID {
				// We have the agent discovery, so skip this redundant PVE node discovery
				continue
			}
		}
		filtered = append(filtered, d)
	}

	return filtered
}

// upgradeDiscoveryIfNeeded upgrades cached discovery fields to current versions.
// This ensures cached discoveries get the new instructional CLI access format
// and have hostname populated without requiring a full re-discovery.
func (s *Service) upgradeCLIAccessIfNeeded(d *ResourceDiscovery) {
	if d == nil {
		return
	}

	upgraded := false

	// Upgrade CLI access if version is outdated
	if d.CLIAccessVersion < CLIAccessVersion {
		oldCLI := d.CLIAccess
		d.CLIAccess = GetCLIAccessTemplate(d.ResourceType)
		d.CLIAccessVersion = CLIAccessVersion
		upgraded = true

		log.Debug().
			Str("id", d.ID).
			Str("old_cli", oldCLI).
			Str("new_cli", d.CLIAccess).
			Int("new_version", CLIAccessVersion).
			Msg("Upgraded CLI access pattern to new version")
	}

	// Fix empty hostname by looking up the resource name from state
	if d.Hostname == "" && s.stateProvider != nil {
		state := s.stateProvider.GetState()
		hostname := s.lookupHostnameFromState(d.ResourceType, d.HostID, d.ResourceID, state)
		if hostname != "" {
			d.Hostname = hostname
			upgraded = true
			log.Debug().
				Str("id", d.ID).
				Str("hostname", hostname).
				Msg("Populated missing hostname from state")
		}
	}

	_ = upgraded // Suppress unused variable warning if logging is disabled
}

// lookupHostnameFromState finds the hostname/name for a resource from state
func (s *Service) lookupHostnameFromState(resourceType ResourceType, hostID, resourceID string, state StateSnapshot) string {
	switch resourceType {
	case ResourceTypeLXC:
		for _, c := range state.Containers {
			if fmt.Sprintf("%d", c.VMID) == resourceID && c.Node == hostID {
				return c.Name
			}
		}
	case ResourceTypeVM:
		for _, vm := range state.VMs {
			if fmt.Sprintf("%d", vm.VMID) == resourceID && vm.Node == hostID {
				return vm.Name
			}
		}
	case ResourceTypeDocker:
		for _, host := range state.DockerHosts {
			if host.AgentID == hostID || host.Hostname == hostID {
				for _, c := range host.Containers {
					if c.Name == resourceID {
						return host.Hostname
					}
				}
			}
		}
	}
	return ""
}

// UpdateNotes updates user notes for a discovery.
func (s *Service) UpdateNotes(id string, notes string, secrets map[string]string) error {
	return s.store.UpdateNotes(id, notes, secrets)
}

// DeleteDiscovery deletes a discovery.
func (s *Service) DeleteDiscovery(id string) error {
	return s.store.Delete(id)
}

// GetProgress returns the progress of an ongoing discovery.
func (s *Service) GetProgress(resourceID string) *DiscoveryProgress {
	if s.scanner == nil {
		return nil
	}
	return s.scanner.GetProgress(resourceID)
}

// GetStatus returns the service status including fingerprint statistics.
func (s *Service) GetStatus() map[string]any {
	s.mu.RLock()
	defer s.mu.RUnlock()

	s.cacheMu.RLock()
	cacheSize := len(s.analysisCache)
	s.cacheMu.RUnlock()

	// Get fingerprint stats
	fingerprintCount := 0
	var lastFingerprintScan time.Time
	if s.store != nil {
		fingerprintCount = s.store.GetFingerprintCount()
		lastFingerprintScan = s.store.GetLastFingerprintScan()
	}

	return map[string]any{
		"running":               s.running,
		"last_run":              s.lastRun,
		"interval":              s.interval.String(),
		"cache_size":            cacheSize,
		"ai_analyzer_set":       s.aiAnalyzer != nil,
		"scanner_set":           s.scanner != nil,
		"store_set":             s.store != nil,
		"deep_scan_timeout":     s.deepScanTimeout.String(),
		"max_discovery_age":     s.maxDiscoveryAge.String(),
		"fingerprint_count":     fingerprintCount,
		"last_fingerprint_scan": lastFingerprintScan,
	}
}

// GetMaxDiscoveryAge returns the current max discovery age (staleness threshold).
func (s *Service) GetMaxDiscoveryAge() time.Duration {
	s.mu.RLock()
	defer s.mu.RUnlock()
	return s.maxDiscoveryAge
}

// SetMaxDiscoveryAge updates the max discovery age (staleness threshold).
// Discoveries older than this duration will be re-run when requested.
func (s *Service) SetMaxDiscoveryAge(age time.Duration) {
	s.mu.Lock()
	defer s.mu.Unlock()

	// Enforce minimum of 1 day
	if age < 24*time.Hour {
		age = 24 * time.Hour
	}

	s.maxDiscoveryAge = age
	log.Info().Dur("max_discovery_age", age).Msg("Max discovery age updated")
}

// ClearCache clears the AI analysis cache.
func (s *Service) ClearCache() {
	s.cacheMu.Lock()
	defer s.cacheMu.Unlock()
	s.analysisCache = make(map[string]*analysisCacheEntry)
}

// --- AI Chat Integration Methods ---

// GetDiscoveryForAIChat returns discovery data for AI chat context.
// It will run discovery if needed (fingerprint changed or no data exists).
// This is the just-in-time discovery approach: only call AI when data is actually needed.
func (s *Service) GetDiscoveryForAIChat(ctx context.Context, resourceType ResourceType, hostID, resourceID string) (*ResourceDiscovery, error) {
	// This is the same as DiscoverResource but without Force
	return s.DiscoverResource(ctx, DiscoveryRequest{
		ResourceType: resourceType,
		ResourceID:   resourceID,
		HostID:       hostID,
		Force:        false, // Let fingerprint logic decide
	})
}

// GetDiscoveriesForAIContext returns discoveries for multiple resources.
// Used when AI chat needs context about the infrastructure.
// Only runs discovery for resources that actually need it (fingerprint changed).
func (s *Service) GetDiscoveriesForAIContext(ctx context.Context, resourceIDs []string) ([]*ResourceDiscovery, error) {
	var results []*ResourceDiscovery
	for _, id := range resourceIDs {
		resourceType, hostID, resourceID, err := ParseResourceID(id)
		if err != nil {
			log.Debug().Err(err).Str("id", id).Msg("Failed to parse resource ID for AI context")
			continue
		}
		discovery, err := s.GetDiscoveryForAIChat(ctx, resourceType, hostID, resourceID)
		if err != nil {
			log.Debug().Err(err).Str("id", id).Msg("Failed to get discovery for AI context")
			continue
		}
		if discovery != nil {
			results = append(results, discovery)
		}
	}
	return results, nil
}

// GetChangedResourceCount returns the count of resources whose fingerprint has changed
// since their last discovery.
func (s *Service) GetChangedResourceCount() (int, error) {
	if s.store == nil {
		return 0, nil
	}
	changed, err := s.store.GetChangedResources()
	if err != nil {
		return 0, err
	}
	return len(changed), nil
}

// GetStaleResourceCount returns the count of resources whose discovery is older
// than maxDiscoveryAge.
func (s *Service) GetStaleResourceCount() (int, error) {
	if s.store == nil {
		return 0, nil
	}
	stale, err := s.store.GetStaleResources(s.maxDiscoveryAge)
	if err != nil {
		return 0, err
	}
	return len(stale), nil
}