mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-30 12:30:17 +00:00
2220 lines
64 KiB
Go
2220 lines
64 KiB
Go
// Package servicediscovery provides infrastructure discovery capabilities.
|
|
// It discovers services, versions, configurations, and CLI access methods
|
|
// for VMs, LXCs, Docker containers, Kubernetes pods, and hosts.
|
|
package servicediscovery
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
// sensitiveKeyPatterns defines patterns that indicate a label/env key might contain secrets.
|
|
// These patterns are case-insensitive and match if any part of the key contains them.
|
|
var sensitiveKeyPatterns = []string{
|
|
"password", "passwd", "pwd",
|
|
"secret",
|
|
"key", "apikey", "api_key",
|
|
"token",
|
|
"credential", "cred",
|
|
"auth",
|
|
"private",
|
|
"cert",
|
|
}
|
|
|
|
// filterSensitiveLabels removes or redacts labels that may contain sensitive values.
|
|
// It returns a new map with sensitive values replaced with "[REDACTED]".
|
|
// Keys are checked case-insensitively for sensitive patterns.
|
|
func filterSensitiveLabels(labels map[string]string) map[string]string {
|
|
if labels == nil {
|
|
return nil
|
|
}
|
|
|
|
filtered := make(map[string]string, len(labels))
|
|
redactedCount := 0
|
|
|
|
for key, value := range labels {
|
|
keyLower := strings.ToLower(key)
|
|
isSensitive := false
|
|
|
|
for _, pattern := range sensitiveKeyPatterns {
|
|
if strings.Contains(keyLower, pattern) {
|
|
isSensitive = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if isSensitive {
|
|
filtered[key] = "[REDACTED]"
|
|
redactedCount++
|
|
} else {
|
|
filtered[key] = value
|
|
}
|
|
}
|
|
|
|
if redactedCount > 0 {
|
|
log.Debug().
|
|
Int("redacted_count", redactedCount).
|
|
Int("total_labels", len(labels)).
|
|
Msg("Redacted sensitive labels before AI analysis")
|
|
}
|
|
|
|
return filtered
|
|
}
|
|
|
|
// StateProvider provides access to the current infrastructure state.
|
|
type StateProvider interface {
|
|
GetState() StateSnapshot
|
|
}
|
|
|
|
// StateSnapshot represents the infrastructure state. This mirrors models.StateSnapshot
|
|
// to avoid circular dependencies.
|
|
type StateSnapshot struct {
|
|
VMs []VM
|
|
Containers []Container
|
|
DockerHosts []DockerHost
|
|
KubernetesClusters []KubernetesCluster
|
|
Hosts []Host
|
|
Nodes []Node
|
|
}
|
|
|
|
// Node represents a Proxmox VE node.
|
|
type Node struct {
|
|
ID string
|
|
Name string
|
|
LinkedHostAgentID string
|
|
}
|
|
|
|
// Host represents a host system (via host-agent).
|
|
type Host struct {
|
|
ID string
|
|
Hostname string
|
|
DisplayName string
|
|
Platform string // e.g., "linux", "darwin", "windows"
|
|
OSName string // e.g., "Unraid", "Ubuntu", "Debian"
|
|
OSVersion string
|
|
KernelVersion string
|
|
Architecture string // e.g., "amd64", "arm64"
|
|
CPUCount int
|
|
Status string
|
|
Tags []string
|
|
}
|
|
|
|
// VM represents a virtual machine.
|
|
type VM struct {
|
|
VMID int
|
|
Name string
|
|
Node string
|
|
Status string
|
|
Instance string
|
|
// Additional metadata for fingerprinting
|
|
CPUs int // Number of CPU cores
|
|
MaxMemory uint64 // Max memory in bytes
|
|
MaxDisk uint64 // Max disk in bytes
|
|
Tags []string // User-defined tags
|
|
OSName string // Detected OS name
|
|
OSVersion string // OS version string
|
|
IPAddresses []string // IP addresses assigned to the VM
|
|
Template bool // True if this is a template
|
|
}
|
|
|
|
// Container represents an LXC container.
|
|
type Container struct {
|
|
VMID int
|
|
Name string
|
|
Node string
|
|
Status string
|
|
Instance string
|
|
// Additional metadata for fingerprinting
|
|
CPUs int // Number of CPU cores
|
|
MaxMemory uint64 // Max memory in bytes
|
|
MaxDisk uint64 // Max disk in bytes
|
|
Tags []string // User-defined tags
|
|
OSTemplate string // Template or OCI image used
|
|
OSName string // Detected OS name
|
|
IsOCI bool // True if OCI container (Proxmox 9.1+)
|
|
IPAddresses []string // IP addresses assigned to the container
|
|
Template bool // True if this is a template
|
|
}
|
|
|
|
// DockerHost represents a Docker host.
|
|
type DockerHost struct {
|
|
AgentID string
|
|
Hostname string
|
|
Containers []DockerContainer
|
|
}
|
|
|
|
// DockerContainer represents a Docker container.
|
|
type DockerContainer struct {
|
|
ID string
|
|
Name string
|
|
Image string
|
|
Status string
|
|
Ports []DockerPort
|
|
Labels map[string]string
|
|
Mounts []DockerMount
|
|
}
|
|
|
|
// DockerPort represents a port mapping.
|
|
type DockerPort struct {
|
|
PublicPort int
|
|
PrivatePort int
|
|
Protocol string
|
|
}
|
|
|
|
// DockerMount represents a mount point.
|
|
type DockerMount struct {
|
|
Source string
|
|
Destination string
|
|
}
|
|
|
|
// KubernetesCluster represents a Kubernetes cluster.
|
|
type KubernetesCluster struct {
|
|
ID string
|
|
Name string
|
|
AgentID string
|
|
Status string
|
|
Pods []KubernetesPod
|
|
}
|
|
|
|
// KubernetesPod represents a Kubernetes pod.
|
|
type KubernetesPod struct {
|
|
UID string
|
|
Name string
|
|
Namespace string
|
|
NodeName string
|
|
Phase string
|
|
Labels map[string]string
|
|
OwnerKind string // e.g., "Deployment", "StatefulSet", "DaemonSet"
|
|
OwnerName string
|
|
Containers []KubernetesPodContainer
|
|
}
|
|
|
|
// KubernetesPodContainer represents a container within a Kubernetes pod.
|
|
type KubernetesPodContainer struct {
|
|
Name string
|
|
Image string
|
|
Ready bool
|
|
RestartCount int32
|
|
State string // e.g., "running", "waiting", "terminated"
|
|
}
|
|
|
|
// AIAnalyzer provides AI analysis capabilities for discovery.
|
|
type AIAnalyzer interface {
|
|
AnalyzeForDiscovery(ctx context.Context, prompt string) (string, error)
|
|
}
|
|
|
|
// WSMessage represents a WebSocket message for broadcasting.
|
|
type WSMessage struct {
|
|
Type string `json:"type"`
|
|
Data interface{} `json:"data"`
|
|
}
|
|
|
|
// WSBroadcaster provides WebSocket broadcasting capabilities.
|
|
type WSBroadcaster interface {
|
|
BroadcastDiscoveryProgress(progress *DiscoveryProgress)
|
|
}
|
|
|
|
// Service manages infrastructure discovery.
|
|
type Service struct {
|
|
store *Store
|
|
scanner *DeepScanner
|
|
stateProvider StateProvider
|
|
aiAnalyzer AIAnalyzer
|
|
wsHub WSBroadcaster // WebSocket hub for broadcasting progress
|
|
|
|
mu sync.RWMutex
|
|
running bool
|
|
stopCh chan struct{}
|
|
intervalCh chan time.Duration // Channel for live interval updates
|
|
interval time.Duration
|
|
initialDelay time.Duration
|
|
lastRun time.Time
|
|
deepScanTimeout time.Duration // Timeout for individual deep scans
|
|
maxDiscoveryAge time.Duration // Max age before rediscovery (default 30 days)
|
|
|
|
// Cache for AI analysis results (by image name)
|
|
analysisCache map[string]*analysisCacheEntry
|
|
cacheMu sync.RWMutex
|
|
cacheExpiry time.Duration
|
|
|
|
// In-progress discovery tracking (prevents duplicate concurrent discoveries)
|
|
inProgressMu sync.Mutex
|
|
inProgress map[string]*discoveryInProgress
|
|
}
|
|
|
|
// discoveryInProgress tracks an ongoing discovery operation.
|
|
// Multiple callers can wait on the done channel for completion.
|
|
type discoveryInProgress struct {
|
|
done chan struct{} // Closed when discovery completes
|
|
result *ResourceDiscovery // Result after completion
|
|
err error // Error after completion
|
|
}
|
|
|
|
// analysisCacheEntry holds a cached AI analysis result with its timestamp.
|
|
type analysisCacheEntry struct {
|
|
result *AIAnalysisResponse
|
|
cachedAt time.Time
|
|
}
|
|
|
|
// Config holds discovery service configuration.
|
|
type Config struct {
|
|
DataDir string
|
|
Interval time.Duration // How often to run fingerprint collection (default 5 min)
|
|
CacheExpiry time.Duration // How long to cache AI analysis results
|
|
DeepScanTimeout time.Duration // Timeout for individual deep scans (default 60s)
|
|
|
|
// Fingerprint-based discovery settings
|
|
MaxDiscoveryAge time.Duration // Rediscover after this duration (default 30 days)
|
|
FingerprintInterval time.Duration // How often to collect fingerprints (default 5 min)
|
|
}
|
|
|
|
// DefaultConfig returns the default discovery configuration.
|
|
func DefaultConfig() Config {
|
|
return Config{
|
|
Interval: 5 * time.Minute, // Fingerprint collection interval
|
|
CacheExpiry: 1 * time.Hour,
|
|
DeepScanTimeout: 60 * time.Second,
|
|
MaxDiscoveryAge: 30 * 24 * time.Hour, // 30 days
|
|
FingerprintInterval: 5 * time.Minute,
|
|
}
|
|
}
|
|
|
|
// NewService creates a new discovery service.
|
|
func NewService(store *Store, scanner *DeepScanner, stateProvider StateProvider, cfg Config) *Service {
|
|
if cfg.Interval == 0 {
|
|
cfg.Interval = 5 * time.Minute
|
|
}
|
|
if cfg.CacheExpiry == 0 {
|
|
cfg.CacheExpiry = 1 * time.Hour
|
|
}
|
|
if cfg.DeepScanTimeout == 0 {
|
|
cfg.DeepScanTimeout = 60 * time.Second
|
|
}
|
|
if cfg.MaxDiscoveryAge == 0 {
|
|
cfg.MaxDiscoveryAge = 30 * 24 * time.Hour // 30 days
|
|
}
|
|
|
|
return &Service{
|
|
store: store,
|
|
scanner: scanner,
|
|
stateProvider: stateProvider,
|
|
interval: cfg.Interval,
|
|
initialDelay: 30 * time.Second,
|
|
cacheExpiry: cfg.CacheExpiry,
|
|
deepScanTimeout: cfg.DeepScanTimeout,
|
|
maxDiscoveryAge: cfg.MaxDiscoveryAge,
|
|
stopCh: make(chan struct{}),
|
|
intervalCh: make(chan time.Duration, 1), // Buffered to prevent blocking
|
|
analysisCache: make(map[string]*analysisCacheEntry),
|
|
inProgress: make(map[string]*discoveryInProgress),
|
|
}
|
|
}
|
|
|
|
// SetAIAnalyzer sets the AI analyzer for discovery.
|
|
func (s *Service) SetAIAnalyzer(analyzer AIAnalyzer) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
s.aiAnalyzer = analyzer
|
|
}
|
|
|
|
// Start begins the background discovery service.
|
|
func (s *Service) Start(ctx context.Context) {
|
|
s.mu.Lock()
|
|
if s.running {
|
|
s.mu.Unlock()
|
|
return
|
|
}
|
|
s.running = true
|
|
s.stopCh = make(chan struct{})
|
|
s.mu.Unlock()
|
|
|
|
log.Info().
|
|
Dur("interval", s.interval).
|
|
Msg("Starting infrastructure discovery service")
|
|
|
|
go s.discoveryLoop(ctx)
|
|
}
|
|
|
|
// Stop stops the background discovery service.
|
|
func (s *Service) Stop() {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
if s.running {
|
|
close(s.stopCh)
|
|
s.running = false
|
|
}
|
|
}
|
|
|
|
// SetInterval updates the scan interval. Takes effect immediately if running.
|
|
func (s *Service) SetInterval(interval time.Duration) {
|
|
s.mu.Lock()
|
|
s.interval = interval
|
|
running := s.running
|
|
s.mu.Unlock()
|
|
|
|
// If running, send the new interval to the loop (non-blocking)
|
|
if running {
|
|
select {
|
|
case s.intervalCh <- interval:
|
|
log.Info().Dur("interval", interval).Msg("Discovery interval updated (live)")
|
|
default:
|
|
// Channel full, interval will be picked up eventually
|
|
log.Debug().Dur("interval", interval).Msg("Discovery interval updated (pending)")
|
|
}
|
|
}
|
|
}
|
|
|
|
// needsDeepScan determines if a discovery result needs a deep scan based on quality.
|
|
// Returns true if the discovery is incomplete or low-confidence.
|
|
func (s *Service) needsDeepScan(discovery *ResourceDiscovery) bool {
|
|
if discovery == nil {
|
|
return true // No discovery at all
|
|
}
|
|
|
|
// Already has deep scan data (raw command outputs)
|
|
if len(discovery.RawCommandOutput) > 0 {
|
|
return false
|
|
}
|
|
|
|
// Low confidence - needs more investigation
|
|
if discovery.Confidence < 0.7 {
|
|
return true
|
|
}
|
|
|
|
// Unknown service type
|
|
if discovery.ServiceType == "" || discovery.ServiceType == "unknown" {
|
|
return true
|
|
}
|
|
|
|
// Missing key paths that deep scan could discover
|
|
if len(discovery.Facts) == 0 && len(discovery.ConfigPaths) == 0 && len(discovery.LogPaths) == 0 {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// SetWSHub sets the WebSocket hub for broadcasting progress updates.
|
|
func (s *Service) SetWSHub(hub WSBroadcaster) {
|
|
s.mu.Lock()
|
|
s.wsHub = hub
|
|
s.mu.Unlock()
|
|
|
|
// Wire up the scanner's progress callback to broadcast via WebSocket
|
|
if s.scanner != nil {
|
|
s.scanner.SetProgressCallback(s.broadcastProgress)
|
|
}
|
|
|
|
log.Info().Msg("WebSocket hub connected to discovery service")
|
|
}
|
|
|
|
// broadcastProgress broadcasts discovery progress to all WebSocket clients.
|
|
func (s *Service) broadcastProgress(progress *DiscoveryProgress) {
|
|
s.mu.RLock()
|
|
hub := s.wsHub
|
|
s.mu.RUnlock()
|
|
|
|
if hub == nil || progress == nil {
|
|
return
|
|
}
|
|
|
|
hub.BroadcastDiscoveryProgress(progress)
|
|
}
|
|
|
|
// IsRunning returns whether the background discovery loop is active.
|
|
func (s *Service) IsRunning() bool {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
return s.running
|
|
}
|
|
|
|
// discoveryLoop runs periodic fingerprint collection and automatic refreshes.
|
|
// Fingerprints detect changes cheaply; changed/stale/new resources are then refreshed.
|
|
func (s *Service) discoveryLoop(ctx context.Context) {
|
|
delay := s.initialDelay
|
|
if delay <= 0 {
|
|
delay = 30 * time.Second
|
|
}
|
|
|
|
// Run initial fingerprint collection after a short delay
|
|
select {
|
|
case <-time.After(delay):
|
|
case <-s.stopCh:
|
|
return
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
|
|
s.collectFingerprints(ctx)
|
|
s.runAutomaticDiscoveryRefresh(ctx)
|
|
|
|
s.mu.RLock()
|
|
currentInterval := s.interval
|
|
s.mu.RUnlock()
|
|
|
|
ticker := time.NewTicker(currentInterval)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
s.collectFingerprints(ctx)
|
|
s.runAutomaticDiscoveryRefresh(ctx)
|
|
case newInterval := <-s.intervalCh:
|
|
// Interval changed - reset the ticker
|
|
ticker.Stop()
|
|
ticker = time.NewTicker(newInterval)
|
|
log.Info().Dur("interval", newInterval).Msg("Fingerprint collection interval reset")
|
|
case <-s.stopCh:
|
|
log.Info().Msg("Stopping discovery service")
|
|
return
|
|
case <-ctx.Done():
|
|
log.Info().Msg("Discovery context cancelled")
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Service) runAutomaticDiscoveryRefresh(ctx context.Context) {
|
|
if ctx == nil || ctx.Err() != nil || s.store == nil {
|
|
return
|
|
}
|
|
|
|
s.mu.RLock()
|
|
analyzerConfigured := s.aiAnalyzer != nil
|
|
maxDiscoveryAge := s.maxDiscoveryAge
|
|
s.mu.RUnlock()
|
|
if !analyzerConfigured {
|
|
log.Debug().Msg("Skipping automatic discovery refresh - AI analyzer not configured")
|
|
return
|
|
}
|
|
|
|
changedResources, err := s.store.GetChangedResources()
|
|
if err != nil {
|
|
log.Warn().Err(err).Msg("Failed to fetch changed resources for automatic discovery refresh")
|
|
return
|
|
}
|
|
staleResources, err := s.store.GetStaleResources(maxDiscoveryAge)
|
|
if err != nil {
|
|
log.Warn().Err(err).Msg("Failed to fetch stale resources for automatic discovery refresh")
|
|
return
|
|
}
|
|
|
|
candidates := make(map[string]struct{}, len(changedResources)+len(staleResources))
|
|
for _, id := range changedResources {
|
|
if strings.TrimSpace(id) != "" {
|
|
candidates[id] = struct{}{}
|
|
}
|
|
}
|
|
for _, id := range staleResources {
|
|
if strings.TrimSpace(id) != "" {
|
|
candidates[id] = struct{}{}
|
|
}
|
|
}
|
|
if len(candidates) == 0 {
|
|
return
|
|
}
|
|
|
|
resourceIDs := make([]string, 0, len(candidates))
|
|
for id := range candidates {
|
|
resourceIDs = append(resourceIDs, id)
|
|
}
|
|
sort.Strings(resourceIDs)
|
|
|
|
log.Info().
|
|
Int("changed", len(changedResources)).
|
|
Int("stale", len(staleResources)).
|
|
Int("total", len(resourceIDs)).
|
|
Msg("Running automatic discovery refresh for changed/stale resources")
|
|
|
|
discoveredCount := 0
|
|
failedCount := 0
|
|
|
|
for _, id := range resourceIDs {
|
|
if ctx.Err() != nil {
|
|
break
|
|
}
|
|
|
|
resourceType, hostID, resourceID, err := ParseResourceID(id)
|
|
if err != nil {
|
|
failedCount++
|
|
log.Warn().
|
|
Err(err).
|
|
Str("resource_id", id).
|
|
Msg("Skipping invalid resource ID during automatic discovery refresh")
|
|
continue
|
|
}
|
|
|
|
_, err = s.DiscoverResource(ctx, DiscoveryRequest{
|
|
ResourceType: resourceType,
|
|
ResourceID: resourceID,
|
|
HostID: hostID,
|
|
Hostname: hostID,
|
|
})
|
|
if err != nil {
|
|
failedCount++
|
|
log.Warn().
|
|
Err(err).
|
|
Str("resource_id", id).
|
|
Str("resource_type", string(resourceType)).
|
|
Msg("Automatic discovery refresh failed for resource")
|
|
continue
|
|
}
|
|
discoveredCount++
|
|
}
|
|
|
|
log.Info().
|
|
Int("discovered", discoveredCount).
|
|
Int("failed", failedCount).
|
|
Msg("Automatic discovery refresh completed")
|
|
}
|
|
|
|
// collectFingerprints collects fingerprints from all resources (Docker, LXC, VM).
|
|
// This is metadata-only and does not invoke the AI analyzer.
|
|
func (s *Service) collectFingerprints(ctx context.Context) {
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
log.Error().Interface("panic", r).Stack().Msg("Recovered from panic in fingerprint collection")
|
|
}
|
|
}()
|
|
|
|
s.mu.Lock()
|
|
s.lastRun = time.Now()
|
|
s.mu.Unlock()
|
|
|
|
if s.stateProvider == nil {
|
|
return
|
|
}
|
|
|
|
state := s.stateProvider.GetState()
|
|
changedCount := 0
|
|
newCount := 0
|
|
|
|
// Process Docker containers
|
|
for _, host := range state.DockerHosts {
|
|
for _, container := range host.Containers {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
|
|
// Generate new fingerprint (prefixed with docker: to avoid collisions)
|
|
newFP := GenerateDockerFingerprint(host.AgentID, &container)
|
|
fpKey := "docker:" + host.AgentID + ":" + newFP.ResourceID
|
|
|
|
// Get previous fingerprint
|
|
oldFP, _ := s.store.GetFingerprint(fpKey)
|
|
|
|
// Update the fingerprint's ResourceID to include prefix for storage
|
|
newFP.ResourceID = fpKey
|
|
|
|
// Save new fingerprint
|
|
if err := s.store.SaveFingerprint(newFP); err != nil {
|
|
log.Warn().Err(err).Str("container", container.Name).Msg("Failed to save Docker fingerprint")
|
|
continue
|
|
}
|
|
|
|
// Check if this is new or changed
|
|
if oldFP == nil {
|
|
newCount++
|
|
log.Debug().
|
|
Str("type", "docker").
|
|
Str("container", container.Name).
|
|
Str("hash", newFP.Hash).
|
|
Msg("New fingerprint captured")
|
|
} else if newFP.HasSchemaChanged(oldFP) {
|
|
// Schema changed - don't count as "changed" to avoid mass rediscovery
|
|
log.Debug().
|
|
Str("type", "docker").
|
|
Str("container", container.Name).
|
|
Int("old_schema", oldFP.SchemaVersion).
|
|
Int("new_schema", newFP.SchemaVersion).
|
|
Msg("Fingerprint schema updated")
|
|
} else if oldFP.Hash != newFP.Hash {
|
|
changedCount++
|
|
log.Info().
|
|
Str("type", "docker").
|
|
Str("container", container.Name).
|
|
Str("old_hash", oldFP.Hash).
|
|
Str("new_hash", newFP.Hash).
|
|
Msg("Fingerprint changed - discovery will run on next request")
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process LXC containers
|
|
for _, lxc := range state.Containers {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
|
|
// Generate new fingerprint
|
|
newFP := GenerateLXCFingerprint(lxc.Node, &lxc)
|
|
fpKey := "lxc:" + lxc.Node + ":" + newFP.ResourceID
|
|
|
|
// Get previous fingerprint
|
|
oldFP, _ := s.store.GetFingerprint(fpKey)
|
|
|
|
// Update the fingerprint's ResourceID to include prefix for storage
|
|
newFP.ResourceID = fpKey
|
|
|
|
// Save new fingerprint
|
|
if err := s.store.SaveFingerprint(newFP); err != nil {
|
|
log.Warn().Err(err).Str("lxc", lxc.Name).Msg("Failed to save LXC fingerprint")
|
|
continue
|
|
}
|
|
|
|
// Check if this is new or changed
|
|
if oldFP == nil {
|
|
newCount++
|
|
log.Debug().
|
|
Str("type", "lxc").
|
|
Str("name", lxc.Name).
|
|
Int("vmid", lxc.VMID).
|
|
Str("hash", newFP.Hash).
|
|
Msg("New fingerprint captured")
|
|
} else if newFP.HasSchemaChanged(oldFP) {
|
|
log.Debug().
|
|
Str("type", "lxc").
|
|
Str("name", lxc.Name).
|
|
Int("vmid", lxc.VMID).
|
|
Int("old_schema", oldFP.SchemaVersion).
|
|
Int("new_schema", newFP.SchemaVersion).
|
|
Msg("Fingerprint schema updated")
|
|
} else if oldFP.Hash != newFP.Hash {
|
|
changedCount++
|
|
log.Info().
|
|
Str("type", "lxc").
|
|
Str("name", lxc.Name).
|
|
Int("vmid", lxc.VMID).
|
|
Str("old_hash", oldFP.Hash).
|
|
Str("new_hash", newFP.Hash).
|
|
Msg("Fingerprint changed - discovery will run on next request")
|
|
}
|
|
}
|
|
|
|
// Process VMs
|
|
for _, vm := range state.VMs {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
|
|
// Generate new fingerprint
|
|
newFP := GenerateVMFingerprint(vm.Node, &vm)
|
|
fpKey := "vm:" + vm.Node + ":" + newFP.ResourceID
|
|
|
|
// Get previous fingerprint
|
|
oldFP, _ := s.store.GetFingerprint(fpKey)
|
|
|
|
// Update the fingerprint's ResourceID to include prefix for storage
|
|
newFP.ResourceID = fpKey
|
|
|
|
// Save new fingerprint
|
|
if err := s.store.SaveFingerprint(newFP); err != nil {
|
|
log.Warn().Err(err).Str("vm", vm.Name).Msg("Failed to save VM fingerprint")
|
|
continue
|
|
}
|
|
|
|
// Check if this is new or changed
|
|
if oldFP == nil {
|
|
newCount++
|
|
log.Debug().
|
|
Str("type", "vm").
|
|
Str("name", vm.Name).
|
|
Int("vmid", vm.VMID).
|
|
Str("hash", newFP.Hash).
|
|
Msg("New fingerprint captured")
|
|
} else if newFP.HasSchemaChanged(oldFP) {
|
|
log.Debug().
|
|
Str("type", "vm").
|
|
Str("name", vm.Name).
|
|
Int("vmid", vm.VMID).
|
|
Int("old_schema", oldFP.SchemaVersion).
|
|
Int("new_schema", newFP.SchemaVersion).
|
|
Msg("Fingerprint schema updated")
|
|
} else if oldFP.Hash != newFP.Hash {
|
|
changedCount++
|
|
log.Info().
|
|
Str("type", "vm").
|
|
Str("name", vm.Name).
|
|
Int("vmid", vm.VMID).
|
|
Str("old_hash", oldFP.Hash).
|
|
Str("new_hash", newFP.Hash).
|
|
Msg("Fingerprint changed - discovery will run on next request")
|
|
}
|
|
}
|
|
|
|
// Process Kubernetes pods
|
|
for _, cluster := range state.KubernetesClusters {
|
|
for _, pod := range cluster.Pods {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
|
|
// Generate new fingerprint
|
|
newFP := GenerateK8sPodFingerprint(cluster.ID, &pod)
|
|
fpKey := "k8s:" + cluster.ID + ":" + pod.Namespace + "/" + pod.Name
|
|
|
|
// Get previous fingerprint
|
|
oldFP, _ := s.store.GetFingerprint(fpKey)
|
|
|
|
// Update the fingerprint's ResourceID to include prefix for storage
|
|
newFP.ResourceID = fpKey
|
|
|
|
// Save new fingerprint
|
|
if err := s.store.SaveFingerprint(newFP); err != nil {
|
|
log.Warn().Err(err).Str("pod", pod.Name).Str("namespace", pod.Namespace).Msg("Failed to save K8s pod fingerprint")
|
|
continue
|
|
}
|
|
|
|
// Check if this is new or changed
|
|
if oldFP == nil {
|
|
newCount++
|
|
log.Debug().
|
|
Str("type", "k8s").
|
|
Str("name", pod.Name).
|
|
Str("namespace", pod.Namespace).
|
|
Str("cluster", cluster.Name).
|
|
Str("hash", newFP.Hash).
|
|
Msg("New fingerprint captured")
|
|
} else if newFP.HasSchemaChanged(oldFP) {
|
|
log.Debug().
|
|
Str("type", "k8s").
|
|
Str("name", pod.Name).
|
|
Str("namespace", pod.Namespace).
|
|
Str("cluster", cluster.Name).
|
|
Int("old_schema", oldFP.SchemaVersion).
|
|
Int("new_schema", newFP.SchemaVersion).
|
|
Msg("Fingerprint schema updated")
|
|
} else if oldFP.Hash != newFP.Hash {
|
|
changedCount++
|
|
log.Info().
|
|
Str("type", "k8s").
|
|
Str("name", pod.Name).
|
|
Str("namespace", pod.Namespace).
|
|
Str("cluster", cluster.Name).
|
|
Str("old_hash", oldFP.Hash).
|
|
Str("new_hash", newFP.Hash).
|
|
Msg("Fingerprint changed - discovery will run on next request")
|
|
}
|
|
}
|
|
}
|
|
|
|
// Update last scan time
|
|
s.store.SetLastFingerprintScan(time.Now())
|
|
|
|
if newCount > 0 || changedCount > 0 {
|
|
log.Info().
|
|
Int("new", newCount).
|
|
Int("changed", changedCount).
|
|
Int("total", s.store.GetFingerprintCount()).
|
|
Msg("Fingerprint collection complete")
|
|
} else {
|
|
log.Debug().
|
|
Int("total", s.store.GetFingerprintCount()).
|
|
Msg("Fingerprint collection complete - no changes")
|
|
}
|
|
|
|
// Cleanup orphaned data (fingerprints/discoveries for removed resources)
|
|
s.cleanupOrphanedData(state)
|
|
}
|
|
|
|
// cleanupOrphanedData removes fingerprints and discoveries for resources that no longer exist.
|
|
func (s *Service) cleanupOrphanedData(state StateSnapshot) {
|
|
// Safety check: Don't cleanup if state appears empty
|
|
// This prevents catastrophic deletion if state provider has an error
|
|
totalResources := len(state.Containers) + len(state.VMs) + len(state.KubernetesClusters)
|
|
for _, host := range state.DockerHosts {
|
|
totalResources += len(host.Containers)
|
|
}
|
|
if totalResources == 0 {
|
|
log.Debug().Msg("Skipping orphaned data cleanup - state is empty (may be an error)")
|
|
return
|
|
}
|
|
|
|
// Build set of current resource IDs
|
|
currentIDs := make(map[string]bool)
|
|
|
|
// Docker containers
|
|
for _, host := range state.DockerHosts {
|
|
for _, container := range host.Containers {
|
|
fpKey := "docker:" + host.AgentID + ":" + container.Name
|
|
currentIDs[fpKey] = true
|
|
}
|
|
}
|
|
|
|
// LXC containers
|
|
for _, lxc := range state.Containers {
|
|
fpKey := "lxc:" + lxc.Node + ":" + strconv.Itoa(lxc.VMID)
|
|
currentIDs[fpKey] = true
|
|
}
|
|
|
|
// VMs
|
|
for _, vm := range state.VMs {
|
|
fpKey := "vm:" + vm.Node + ":" + strconv.Itoa(vm.VMID)
|
|
currentIDs[fpKey] = true
|
|
}
|
|
|
|
// Kubernetes pods
|
|
for _, cluster := range state.KubernetesClusters {
|
|
for _, pod := range cluster.Pods {
|
|
fpKey := "k8s:" + cluster.ID + ":" + pod.Namespace + "/" + pod.Name
|
|
currentIDs[fpKey] = true
|
|
}
|
|
}
|
|
|
|
// Run cleanup
|
|
fpRemoved := s.store.CleanupOrphanedFingerprints(currentIDs)
|
|
discRemoved := s.store.CleanupOrphanedDiscoveries(currentIDs)
|
|
|
|
if fpRemoved > 0 || discRemoved > 0 {
|
|
log.Info().
|
|
Int("fingerprints_removed", fpRemoved).
|
|
Int("discoveries_removed", discRemoved).
|
|
Msg("Cleaned up orphaned data")
|
|
}
|
|
}
|
|
|
|
// discoverDockerContainers runs discovery on Docker containers using metadata.
|
|
// Automatically runs deep scans when the shallow scan results are incomplete or low-confidence.
|
|
func (s *Service) discoverDockerContainers(ctx context.Context, hosts []DockerHost) {
|
|
s.mu.RLock()
|
|
analyzer := s.aiAnalyzer
|
|
s.mu.RUnlock()
|
|
|
|
if analyzer == nil {
|
|
log.Debug().Msg("AI analyzer not set, skipping Docker discovery")
|
|
return
|
|
}
|
|
|
|
for _, host := range hosts {
|
|
for _, container := range host.Containers {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
|
|
// Build resource ID
|
|
id := MakeResourceID(ResourceTypeDocker, host.AgentID, container.Name)
|
|
|
|
// Check if we already have a recent discovery
|
|
if !s.store.NeedsRefresh(id, s.cacheExpiry) {
|
|
continue
|
|
}
|
|
|
|
// Check existing discovery to see if it needs a deep scan
|
|
existing, _ := s.store.Get(id)
|
|
|
|
// Analyze using metadata (shallow discovery)
|
|
discovery := s.analyzeDockerContainer(ctx, analyzer, container, host)
|
|
if discovery != nil {
|
|
// Smart auto deep scan: enhance if discovery is incomplete or low-confidence
|
|
// Also deep scan if there's no existing discovery (first time)
|
|
if s.scanner != nil && (existing == nil || s.needsDeepScan(discovery)) {
|
|
log.Info().
|
|
Str("id", id).
|
|
Float64("confidence", discovery.Confidence).
|
|
Str("serviceType", discovery.ServiceType).
|
|
Bool("firstDiscovery", existing == nil).
|
|
Msg("Auto deep scan triggered due to incomplete discovery")
|
|
discovery = s.enhanceWithDeepScan(ctx, discovery, host)
|
|
}
|
|
|
|
// Suggest web interface URL using Docker host hostname
|
|
discovery.SuggestedURL = SuggestWebURL(discovery, host.Hostname)
|
|
|
|
if err := s.store.Save(discovery); err != nil {
|
|
log.Warn().Err(err).Str("id", id).Msg("Failed to save discovery")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// enhanceWithDeepScan runs a deep scan and merges the results into the discovery.
|
|
func (s *Service) enhanceWithDeepScan(ctx context.Context, discovery *ResourceDiscovery, host DockerHost) *ResourceDiscovery {
|
|
s.mu.RLock()
|
|
timeout := s.deepScanTimeout
|
|
analyzer := s.aiAnalyzer
|
|
s.mu.RUnlock()
|
|
|
|
if s.scanner == nil || analyzer == nil {
|
|
return discovery
|
|
}
|
|
|
|
// Create a timeout context for the deep scan
|
|
scanCtx, cancel := context.WithTimeout(ctx, timeout)
|
|
defer cancel()
|
|
|
|
req := DiscoveryRequest{
|
|
ResourceType: discovery.ResourceType,
|
|
ResourceID: discovery.ResourceID,
|
|
HostID: discovery.HostID,
|
|
Hostname: discovery.Hostname,
|
|
}
|
|
|
|
scanResult, err := s.scanner.Scan(scanCtx, req)
|
|
if err != nil {
|
|
log.Debug().Err(err).Str("id", discovery.ID).Msg("Deep scan failed during background discovery")
|
|
return discovery
|
|
}
|
|
|
|
if len(scanResult.CommandOutputs) == 0 {
|
|
return discovery
|
|
}
|
|
|
|
// Build analysis request with command outputs
|
|
analysisReq := AIAnalysisRequest{
|
|
ResourceType: discovery.ResourceType,
|
|
ResourceID: discovery.ResourceID,
|
|
HostID: discovery.HostID,
|
|
Hostname: discovery.Hostname,
|
|
CommandOutputs: scanResult.CommandOutputs,
|
|
}
|
|
|
|
// Add metadata if available
|
|
if s.stateProvider != nil {
|
|
analysisReq.Metadata = s.getResourceMetadata(req)
|
|
}
|
|
|
|
// Build prompt and analyze
|
|
prompt := s.buildDeepAnalysisPrompt(analysisReq)
|
|
response, err := analyzer.AnalyzeForDiscovery(scanCtx, prompt)
|
|
if err != nil {
|
|
log.Debug().Err(err).Str("id", discovery.ID).Msg("Deep analysis failed during background discovery")
|
|
return discovery
|
|
}
|
|
|
|
result := s.parseAIResponse(response)
|
|
if result == nil {
|
|
return discovery
|
|
}
|
|
|
|
// Merge results - deep scan results take precedence for non-empty fields
|
|
if result.ServiceType != "" && result.ServiceType != "unknown" {
|
|
discovery.ServiceType = result.ServiceType
|
|
}
|
|
if result.ServiceName != "" {
|
|
discovery.ServiceName = result.ServiceName
|
|
}
|
|
if result.ServiceVersion != "" {
|
|
discovery.ServiceVersion = result.ServiceVersion
|
|
}
|
|
if result.Category != "" && result.Category != CategoryUnknown {
|
|
discovery.Category = result.Category
|
|
}
|
|
if result.CLIAccess != "" {
|
|
discovery.CLIAccess = s.formatCLIAccess(discovery.ResourceType, discovery.ResourceID, result.CLIAccess)
|
|
}
|
|
if len(result.Facts) > 0 {
|
|
discovery.Facts = result.Facts
|
|
}
|
|
if len(result.ConfigPaths) > 0 {
|
|
discovery.ConfigPaths = result.ConfigPaths
|
|
}
|
|
if len(result.DataPaths) > 0 {
|
|
discovery.DataPaths = result.DataPaths
|
|
}
|
|
if len(result.LogPaths) > 0 {
|
|
discovery.LogPaths = result.LogPaths
|
|
}
|
|
if len(result.Ports) > 0 {
|
|
discovery.Ports = result.Ports
|
|
}
|
|
if result.Confidence > discovery.Confidence {
|
|
discovery.Confidence = result.Confidence
|
|
}
|
|
if result.Reasoning != "" {
|
|
discovery.AIReasoning = result.Reasoning
|
|
}
|
|
|
|
// Store raw command outputs
|
|
discovery.RawCommandOutput = scanResult.CommandOutputs
|
|
discovery.ScanDuration = scanResult.CompletedAt.Sub(scanResult.StartedAt).Milliseconds()
|
|
discovery.UpdatedAt = time.Now()
|
|
|
|
// Parse docker_mounts if present (for LXCs/VMs running Docker)
|
|
if dockerMountsOutput, ok := scanResult.CommandOutputs["docker_mounts"]; ok {
|
|
discovery.DockerMounts = parseDockerMounts(dockerMountsOutput)
|
|
if len(discovery.DockerMounts) > 0 {
|
|
log.Debug().
|
|
Str("id", discovery.ID).
|
|
Int("mountCount", len(discovery.DockerMounts)).
|
|
Msg("Parsed Docker bind mounts from discovery")
|
|
}
|
|
}
|
|
|
|
log.Info().
|
|
Str("id", discovery.ID).
|
|
Int("commandOutputs", len(scanResult.CommandOutputs)).
|
|
Int("dockerMounts", len(discovery.DockerMounts)).
|
|
Dur("scanDuration", scanResult.CompletedAt.Sub(scanResult.StartedAt)).
|
|
Msg("Enhanced discovery with deep scan")
|
|
|
|
return discovery
|
|
}
|
|
|
|
// analyzeDockerContainer analyzes a Docker container using AI.
|
|
func (s *Service) analyzeDockerContainer(ctx context.Context, analyzer AIAnalyzer, c DockerContainer, host DockerHost) *ResourceDiscovery {
|
|
// Check cache first (per-image timestamp)
|
|
s.cacheMu.RLock()
|
|
entry, found := s.analysisCache[c.Image]
|
|
cacheValid := found && time.Since(entry.cachedAt) < s.cacheExpiry
|
|
s.cacheMu.RUnlock()
|
|
|
|
var result *AIAnalysisResponse
|
|
|
|
if cacheValid {
|
|
result = entry.result
|
|
} else {
|
|
// Build prompt for AI analysis
|
|
prompt := s.buildMetadataAnalysisPrompt(c, host)
|
|
|
|
response, err := analyzer.AnalyzeForDiscovery(ctx, prompt)
|
|
if err != nil {
|
|
log.Warn().Err(err).Str("container", c.Name).Msg("AI analysis failed")
|
|
return nil
|
|
}
|
|
|
|
result = s.parseAIResponse(response)
|
|
if result == nil {
|
|
log.Warn().Str("container", c.Name).Msg("Failed to parse AI response")
|
|
return nil
|
|
}
|
|
|
|
// Cache the result with its own timestamp
|
|
s.cacheMu.Lock()
|
|
s.analysisCache[c.Image] = &analysisCacheEntry{
|
|
result: result,
|
|
cachedAt: time.Now(),
|
|
}
|
|
s.cacheMu.Unlock()
|
|
}
|
|
|
|
// Skip unknown/low-confidence results
|
|
if result.ServiceType == "unknown" || result.Confidence < 0.5 {
|
|
return nil
|
|
}
|
|
|
|
// Build CLI access string
|
|
cliAccess := result.CLIAccess
|
|
if cliAccess != "" {
|
|
cliAccess = strings.ReplaceAll(cliAccess, "{container}", c.Name)
|
|
}
|
|
|
|
// Extract ports
|
|
var ports []PortInfo
|
|
for _, p := range c.Ports {
|
|
ports = append(ports, PortInfo{
|
|
Port: p.PrivatePort,
|
|
Protocol: p.Protocol,
|
|
Address: fmt.Sprintf(":%d", p.PublicPort),
|
|
})
|
|
}
|
|
|
|
return &ResourceDiscovery{
|
|
ID: MakeResourceID(ResourceTypeDocker, host.AgentID, c.Name),
|
|
ResourceType: ResourceTypeDocker,
|
|
ResourceID: c.Name,
|
|
HostID: host.AgentID,
|
|
Hostname: host.Hostname,
|
|
ServiceType: result.ServiceType,
|
|
ServiceName: result.ServiceName,
|
|
ServiceVersion: result.ServiceVersion,
|
|
Category: result.Category,
|
|
CLIAccess: cliAccess,
|
|
Facts: result.Facts,
|
|
ConfigPaths: result.ConfigPaths,
|
|
DataPaths: result.DataPaths,
|
|
LogPaths: result.LogPaths,
|
|
Ports: ports,
|
|
Confidence: result.Confidence,
|
|
AIReasoning: result.Reasoning,
|
|
DiscoveredAt: time.Now(),
|
|
UpdatedAt: time.Now(),
|
|
}
|
|
}
|
|
|
|
// DiscoverResource performs deep discovery on a specific resource.
|
|
// Uses fingerprint-based detection to avoid unnecessary AI calls:
|
|
// - Returns cached discovery if fingerprint hasn't changed
|
|
// - Runs discovery only when fingerprint changed or discovery is too old
|
|
// - Prevents duplicate concurrent discoveries for the same resource
|
|
func (s *Service) DiscoverResource(ctx context.Context, req DiscoveryRequest) (*ResourceDiscovery, error) {
|
|
// Redirect PVE node requests to linked host agent if available
|
|
// This ensures we always scan and store data under the canonical Host Agent ID
|
|
if req.ResourceType == ResourceTypeHost && s.stateProvider != nil {
|
|
state := s.stateProvider.GetState()
|
|
for _, node := range state.Nodes {
|
|
// Check if the requested ID matches the Node Name or ID
|
|
if node.Name == req.HostID || node.Name == req.ResourceID || node.ID == req.ResourceID {
|
|
if node.LinkedHostAgentID != "" {
|
|
log.Info().
|
|
Str("from_host", req.HostID).
|
|
Str("to_agent", node.LinkedHostAgentID).
|
|
Msg("Redirecting discovery scan to linked host agent")
|
|
req.HostID = node.LinkedHostAgentID
|
|
req.ResourceID = node.LinkedHostAgentID
|
|
}
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
resourceID := MakeResourceID(req.ResourceType, req.HostID, req.ResourceID)
|
|
|
|
// Get current fingerprint (if available)
|
|
// Fingerprint key matches the resource ID format: type:host:id
|
|
currentFP, _ := s.store.GetFingerprint(resourceID)
|
|
|
|
// Get existing discovery
|
|
existing, _ := s.store.Get(resourceID)
|
|
|
|
// Determine if we need to run discovery
|
|
needsDiscovery := false
|
|
reason := ""
|
|
|
|
if req.Force {
|
|
needsDiscovery = true
|
|
reason = "forced"
|
|
} else if existing == nil {
|
|
needsDiscovery = true
|
|
reason = "no existing discovery"
|
|
} else if currentFP != nil && existing.Fingerprint != currentFP.Hash {
|
|
// Fingerprint hash differs - check if it's just a schema version change
|
|
if existing.FingerprintSchemaVersion != 0 && existing.FingerprintSchemaVersion != currentFP.SchemaVersion {
|
|
// Schema changed but container didn't - don't trigger rediscovery
|
|
// This prevents mass rediscovery when we upgrade the fingerprint algorithm
|
|
log.Debug().
|
|
Str("id", resourceID).
|
|
Int("old_schema", existing.FingerprintSchemaVersion).
|
|
Int("new_schema", currentFP.SchemaVersion).
|
|
Msg("Fingerprint schema changed, but not triggering rediscovery")
|
|
} else {
|
|
// Same schema version, different hash = real container change
|
|
needsDiscovery = true
|
|
reason = "fingerprint changed"
|
|
}
|
|
} else if time.Since(existing.DiscoveredAt) > s.maxDiscoveryAge {
|
|
needsDiscovery = true
|
|
reason = "discovery too old"
|
|
}
|
|
|
|
// Return cached discovery if still valid
|
|
if !needsDiscovery && existing != nil {
|
|
log.Debug().Str("id", resourceID).Msg("Discovery still valid, returning cached")
|
|
return existing, nil
|
|
}
|
|
|
|
// Check for duplicate concurrent discovery requests
|
|
s.inProgressMu.Lock()
|
|
if inProg, ok := s.inProgress[resourceID]; ok {
|
|
// Discovery already in progress - wait for it
|
|
s.inProgressMu.Unlock()
|
|
log.Debug().Str("id", resourceID).Msg("Discovery already in progress, waiting for result")
|
|
|
|
select {
|
|
case <-inProg.done:
|
|
return inProg.result, inProg.err
|
|
case <-ctx.Done():
|
|
return nil, ctx.Err()
|
|
}
|
|
}
|
|
|
|
// Claim this discovery slot
|
|
inProg := &discoveryInProgress{
|
|
done: make(chan struct{}),
|
|
}
|
|
s.inProgress[resourceID] = inProg
|
|
s.inProgressMu.Unlock()
|
|
|
|
// Ensure we clean up and notify waiters when done
|
|
defer func() {
|
|
close(inProg.done)
|
|
s.inProgressMu.Lock()
|
|
delete(s.inProgress, resourceID)
|
|
s.inProgressMu.Unlock()
|
|
}()
|
|
|
|
log.Info().Str("id", resourceID).Str("reason", reason).Msg("Running discovery")
|
|
|
|
s.mu.RLock()
|
|
analyzer := s.aiAnalyzer
|
|
s.mu.RUnlock()
|
|
|
|
if analyzer == nil {
|
|
inProg.err = fmt.Errorf("AI analyzer not configured")
|
|
return nil, inProg.err
|
|
}
|
|
|
|
// Run deep scan if scanner is available
|
|
var scanResult *ScanResult
|
|
var scanError error
|
|
if s.scanner != nil {
|
|
scanResult, scanError = s.scanner.Scan(ctx, req)
|
|
if scanError != nil {
|
|
log.Warn().
|
|
Err(scanError).
|
|
Str("id", resourceID).
|
|
Str("resource_type", string(req.ResourceType)).
|
|
Msg("Deep scan failed, falling back to metadata-only analysis. For full discovery, ensure the host agent is connected with commands enabled.")
|
|
}
|
|
}
|
|
|
|
// Build analysis request
|
|
analysisReq := AIAnalysisRequest{
|
|
ResourceType: req.ResourceType,
|
|
ResourceID: req.ResourceID,
|
|
HostID: req.HostID,
|
|
Hostname: req.Hostname,
|
|
}
|
|
|
|
if scanResult != nil {
|
|
analysisReq.CommandOutputs = scanResult.CommandOutputs
|
|
}
|
|
|
|
// Add metadata if available
|
|
if s.stateProvider != nil {
|
|
analysisReq.Metadata = s.getResourceMetadata(req)
|
|
}
|
|
|
|
// Build prompt and analyze
|
|
prompt := s.buildDeepAnalysisPrompt(analysisReq)
|
|
|
|
// Broadcast progress: AI analysis starting
|
|
s.broadcastProgress(&DiscoveryProgress{
|
|
ResourceID: resourceID,
|
|
Status: DiscoveryStatusRunning,
|
|
CurrentStep: "Analyzing with Pulse Assistant...",
|
|
})
|
|
|
|
response, err := analyzer.AnalyzeForDiscovery(ctx, prompt)
|
|
if err != nil {
|
|
inProg.err = fmt.Errorf("AI analysis failed: %w", err)
|
|
return nil, inProg.err
|
|
}
|
|
|
|
result := s.parseAIResponse(response)
|
|
if result == nil {
|
|
// Truncate response for error message
|
|
truncated := response
|
|
if len(truncated) > 500 {
|
|
truncated = truncated[:500] + "..."
|
|
}
|
|
inProg.err = fmt.Errorf("failed to parse AI response: %s", truncated)
|
|
return nil, inProg.err
|
|
}
|
|
|
|
// Resolve hostname from metadata if not provided in request
|
|
hostname := req.Hostname
|
|
if hostname == "" && analysisReq.Metadata != nil {
|
|
if name, ok := analysisReq.Metadata["name"].(string); ok && name != "" {
|
|
hostname = name
|
|
}
|
|
}
|
|
|
|
// Build discovery result
|
|
discovery := &ResourceDiscovery{
|
|
ID: resourceID,
|
|
ResourceType: req.ResourceType,
|
|
ResourceID: req.ResourceID,
|
|
HostID: req.HostID,
|
|
Hostname: hostname,
|
|
ServiceType: result.ServiceType,
|
|
ServiceName: result.ServiceName,
|
|
ServiceVersion: result.ServiceVersion,
|
|
Category: result.Category,
|
|
CLIAccess: s.formatCLIAccess(req.ResourceType, req.ResourceID, result.CLIAccess),
|
|
CLIAccessVersion: CLIAccessVersion,
|
|
Facts: result.Facts,
|
|
ConfigPaths: result.ConfigPaths,
|
|
DataPaths: result.DataPaths,
|
|
LogPaths: result.LogPaths,
|
|
Ports: result.Ports,
|
|
Confidence: result.Confidence,
|
|
AIReasoning: result.Reasoning,
|
|
DiscoveredAt: time.Now(),
|
|
UpdatedAt: time.Now(),
|
|
}
|
|
|
|
// Store fingerprint with discovery
|
|
if currentFP != nil {
|
|
discovery.Fingerprint = currentFP.Hash
|
|
discovery.FingerprintedAt = currentFP.GeneratedAt
|
|
discovery.FingerprintSchemaVersion = currentFP.SchemaVersion
|
|
}
|
|
|
|
if scanResult != nil {
|
|
discovery.RawCommandOutput = scanResult.CommandOutputs
|
|
discovery.ScanDuration = scanResult.CompletedAt.Sub(scanResult.StartedAt).Milliseconds()
|
|
|
|
// Parse docker_mounts if present (for LXCs/VMs running Docker)
|
|
if dockerMountsOutput, ok := scanResult.CommandOutputs["docker_mounts"]; ok {
|
|
discovery.DockerMounts = parseDockerMounts(dockerMountsOutput)
|
|
if len(discovery.DockerMounts) > 0 {
|
|
log.Debug().
|
|
Str("id", discovery.ID).
|
|
Int("mountCount", len(discovery.DockerMounts)).
|
|
Msg("Parsed Docker bind mounts from on-demand discovery")
|
|
}
|
|
}
|
|
} else if scanError != nil {
|
|
// Add note to reasoning when we couldn't run commands
|
|
metadataNote := "[Note: Discovery was limited to metadata-only analysis because command execution was unavailable. "
|
|
if strings.Contains(scanError.Error(), "no connected agent") {
|
|
metadataNote += "To enable full discovery with command execution, ensure the host agent has 'Pulse Commands' enabled in Settings → Unified Agents.]"
|
|
} else {
|
|
metadataNote += "Error: " + scanError.Error() + "]"
|
|
}
|
|
if discovery.AIReasoning != "" {
|
|
discovery.AIReasoning = metadataNote + " " + discovery.AIReasoning
|
|
} else {
|
|
discovery.AIReasoning = metadataNote
|
|
}
|
|
}
|
|
|
|
// Preserve user notes from existing discovery
|
|
if existing != nil {
|
|
discovery.UserNotes = existing.UserNotes
|
|
discovery.UserSecrets = existing.UserSecrets
|
|
if discovery.DiscoveredAt.IsZero() || existing.DiscoveredAt.Before(discovery.DiscoveredAt) {
|
|
discovery.DiscoveredAt = existing.DiscoveredAt
|
|
}
|
|
}
|
|
|
|
// Suggest web interface URL based on service type and external IP
|
|
if s.stateProvider != nil {
|
|
if externalIP := s.getResourceExternalIP(req); externalIP != "" {
|
|
discovery.SuggestedURL = SuggestWebURL(discovery, externalIP)
|
|
}
|
|
}
|
|
|
|
// Broadcast progress: Discovery complete
|
|
s.broadcastProgress(&DiscoveryProgress{
|
|
ResourceID: resourceID,
|
|
Status: DiscoveryStatusCompleted,
|
|
CurrentStep: "Discovery complete",
|
|
PercentComplete: 100,
|
|
})
|
|
|
|
// Save discovery
|
|
if err := s.store.Save(discovery); err != nil {
|
|
inProg.err = fmt.Errorf("failed to save discovery: %w", err)
|
|
return nil, inProg.err
|
|
}
|
|
|
|
// Store result for any waiting goroutines
|
|
inProg.result = discovery
|
|
return discovery, nil
|
|
}
|
|
|
|
// getResourceMetadata retrieves metadata for a resource from the state.
|
|
func (s *Service) getResourceMetadata(req DiscoveryRequest) map[string]any {
|
|
if s.stateProvider == nil {
|
|
return nil
|
|
}
|
|
|
|
state := s.stateProvider.GetState()
|
|
metadata := make(map[string]any)
|
|
|
|
switch req.ResourceType {
|
|
case ResourceTypeLXC:
|
|
for _, c := range state.Containers {
|
|
if fmt.Sprintf("%d", c.VMID) == req.ResourceID && c.Node == req.HostID {
|
|
metadata["name"] = c.Name
|
|
metadata["status"] = c.Status
|
|
metadata["vmid"] = c.VMID
|
|
break
|
|
}
|
|
}
|
|
case ResourceTypeVM:
|
|
for _, vm := range state.VMs {
|
|
if fmt.Sprintf("%d", vm.VMID) == req.ResourceID && vm.Node == req.HostID {
|
|
metadata["name"] = vm.Name
|
|
metadata["status"] = vm.Status
|
|
metadata["vmid"] = vm.VMID
|
|
break
|
|
}
|
|
}
|
|
case ResourceTypeDocker:
|
|
for _, host := range state.DockerHosts {
|
|
if host.AgentID == req.HostID || host.Hostname == req.HostID {
|
|
for _, c := range host.Containers {
|
|
if c.Name == req.ResourceID {
|
|
metadata["image"] = c.Image
|
|
metadata["status"] = c.Status
|
|
// Filter sensitive labels before sending to AI
|
|
metadata["labels"] = filterSensitiveLabels(c.Labels)
|
|
break
|
|
}
|
|
}
|
|
break
|
|
}
|
|
}
|
|
case ResourceTypeHost:
|
|
for _, host := range state.Hosts {
|
|
if host.ID == req.ResourceID || host.Hostname == req.ResourceID || host.ID == req.HostID {
|
|
metadata["hostname"] = host.Hostname
|
|
metadata["display_name"] = host.DisplayName
|
|
metadata["platform"] = host.Platform
|
|
metadata["os_name"] = host.OSName
|
|
metadata["os_version"] = host.OSVersion
|
|
metadata["kernel_version"] = host.KernelVersion
|
|
metadata["architecture"] = host.Architecture
|
|
metadata["cpu_count"] = host.CPUCount
|
|
metadata["status"] = host.Status
|
|
if len(host.Tags) > 0 {
|
|
metadata["tags"] = host.Tags
|
|
}
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
return metadata
|
|
}
|
|
|
|
// getResourceExternalIP retrieves the external IP address for a resource from the state.
|
|
// For LXC/VM, this is the first IP from the Proxmox guest agent.
|
|
// For Docker containers, this is the Docker host's IP/hostname.
|
|
func (s *Service) getResourceExternalIP(req DiscoveryRequest) string {
|
|
if s.stateProvider == nil {
|
|
return ""
|
|
}
|
|
|
|
state := s.stateProvider.GetState()
|
|
|
|
switch req.ResourceType {
|
|
case ResourceTypeLXC:
|
|
for _, c := range state.Containers {
|
|
if fmt.Sprintf("%d", c.VMID) == req.ResourceID && c.Node == req.HostID {
|
|
if len(c.IPAddresses) > 0 {
|
|
return c.IPAddresses[0]
|
|
}
|
|
return ""
|
|
}
|
|
}
|
|
case ResourceTypeVM:
|
|
for _, vm := range state.VMs {
|
|
if fmt.Sprintf("%d", vm.VMID) == req.ResourceID && vm.Node == req.HostID {
|
|
if len(vm.IPAddresses) > 0 {
|
|
return vm.IPAddresses[0]
|
|
}
|
|
return ""
|
|
}
|
|
}
|
|
case ResourceTypeDocker:
|
|
// For Docker containers, use the Docker host's hostname/IP
|
|
for _, host := range state.DockerHosts {
|
|
if host.AgentID == req.HostID || host.Hostname == req.HostID {
|
|
// Use hostname if it looks like an IP, otherwise it's a hostname
|
|
return host.Hostname
|
|
}
|
|
}
|
|
case ResourceTypeDockerVM, ResourceTypeDockerLXC:
|
|
// For Docker containers inside VMs/LXCs, find the VM/LXC's IP
|
|
// The hostID contains the parent resource info
|
|
for _, vm := range state.VMs {
|
|
if fmt.Sprintf("%d", vm.VMID) == req.HostID || vm.Name == req.HostID {
|
|
if len(vm.IPAddresses) > 0 {
|
|
return vm.IPAddresses[0]
|
|
}
|
|
}
|
|
}
|
|
for _, c := range state.Containers {
|
|
if fmt.Sprintf("%d", c.VMID) == req.HostID || c.Name == req.HostID {
|
|
if len(c.IPAddresses) > 0 {
|
|
return c.IPAddresses[0]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
// formatCLIAccess formats the CLI access string with actual values.
|
|
func (s *Service) formatCLIAccess(resourceType ResourceType, resourceID, cliTemplate string) string {
|
|
if cliTemplate == "" {
|
|
// Use default template
|
|
cliTemplate = GetCLIAccessTemplate(resourceType)
|
|
}
|
|
|
|
result := cliTemplate
|
|
result = strings.ReplaceAll(result, "{vmid}", resourceID)
|
|
result = strings.ReplaceAll(result, "{container}", resourceID)
|
|
result = strings.ReplaceAll(result, "{command}", "...")
|
|
|
|
return result
|
|
}
|
|
|
|
// buildMetadataAnalysisPrompt builds a prompt for shallow metadata-based analysis.
|
|
func (s *Service) buildMetadataAnalysisPrompt(c DockerContainer, host DockerHost) string {
|
|
info := map[string]any{
|
|
"name": c.Name,
|
|
"image": c.Image,
|
|
"status": c.Status,
|
|
"host": host.Hostname,
|
|
}
|
|
|
|
if len(c.Ports) > 0 {
|
|
var ports []map[string]any
|
|
for _, p := range c.Ports {
|
|
ports = append(ports, map[string]any{
|
|
"public": p.PublicPort,
|
|
"private": p.PrivatePort,
|
|
"protocol": p.Protocol,
|
|
})
|
|
}
|
|
info["ports"] = ports
|
|
}
|
|
|
|
if len(c.Labels) > 0 {
|
|
// Filter sensitive labels before sending to AI
|
|
info["labels"] = filterSensitiveLabels(c.Labels)
|
|
}
|
|
|
|
if len(c.Mounts) > 0 {
|
|
var mounts []string
|
|
for _, m := range c.Mounts {
|
|
mounts = append(mounts, m.Destination)
|
|
}
|
|
info["mounts"] = mounts
|
|
}
|
|
|
|
infoJSON, _ := json.MarshalIndent(info, "", " ")
|
|
|
|
return fmt.Sprintf(`Analyze this Docker container and identify what service it's running.
|
|
|
|
Container Information:
|
|
%s
|
|
|
|
Based on the image name, ports, labels, and mounts, determine:
|
|
1. What service/application is this?
|
|
2. What category does it belong to?
|
|
3. How should CLI commands be executed?
|
|
|
|
Respond in this exact JSON format:
|
|
{
|
|
"service_type": "lowercase_type",
|
|
"service_name": "Human Readable Name",
|
|
"service_version": "version if detectable from image tag",
|
|
"category": "database|web_server|cache|monitoring|backup|nvr|storage|container|network|security|media|home_automation|unknown",
|
|
"cli_access": "docker exec {container} <cli-tool>",
|
|
"facts": [],
|
|
"config_paths": [],
|
|
"data_paths": [],
|
|
"log_paths": [],
|
|
"ports": [],
|
|
"confidence": 0.0-1.0,
|
|
"reasoning": "Brief explanation"
|
|
}
|
|
|
|
Respond with ONLY valid JSON.`, string(infoJSON))
|
|
}
|
|
|
|
// buildDeepAnalysisPrompt builds a prompt for deep analysis with command outputs.
|
|
func (s *Service) buildDeepAnalysisPrompt(req AIAnalysisRequest) string {
|
|
var sections []string
|
|
|
|
sections = append(sections, fmt.Sprintf(`Resource Type: %s
|
|
Resource ID: %s
|
|
Host: %s (%s)`, req.ResourceType, req.ResourceID, req.Hostname, req.HostID))
|
|
|
|
if len(req.Metadata) > 0 {
|
|
metaJSON, _ := json.MarshalIndent(req.Metadata, "", " ")
|
|
sections = append(sections, fmt.Sprintf("Metadata:\n%s", string(metaJSON)))
|
|
}
|
|
|
|
if len(req.CommandOutputs) > 0 {
|
|
sections = append(sections, "Command Outputs:")
|
|
for name, output := range req.CommandOutputs {
|
|
// Truncate long outputs
|
|
if len(output) > 2000 {
|
|
output = output[:2000] + "\n... (truncated)"
|
|
}
|
|
sections = append(sections, fmt.Sprintf("--- %s ---\n%s", name, output))
|
|
}
|
|
}
|
|
|
|
// Use different prompts for HOST vs other resource types
|
|
if req.ResourceType == ResourceTypeHost {
|
|
return fmt.Sprintf(`Analyze this HOST system and provide detailed discovery information.
|
|
|
|
%s
|
|
|
|
IMPORTANT: This is a HOST discovery. Focus on identifying the HOST OPERATING SYSTEM and its primary role/purpose, NOT individual services or containers running on it.
|
|
|
|
Based on all available information, determine:
|
|
1. What is the host operating system? (e.g., Unraid, Proxmox, Ubuntu Server, Debian, TrueNAS)
|
|
2. What is the OS version?
|
|
3. What is the primary role/purpose of this host? (e.g., NAS, hypervisor, media server, backup server)
|
|
4. What are the key system paths?
|
|
5. What storage is available?
|
|
6. What services are running? (list as facts, not as the primary identification)
|
|
|
|
Respond in this exact JSON format:
|
|
{
|
|
"service_type": "lowercase_os_type (e.g., unraid, proxmox, ubuntu, debian, truenas)",
|
|
"service_name": "Human Readable OS Name and Role (e.g., Unraid NAS Server, Proxmox VE Hypervisor)",
|
|
"service_version": "OS version number",
|
|
"category": "storage|virtualizer|container|network|unknown",
|
|
"cli_access": "ssh user@hostname",
|
|
"facts": [
|
|
{"category": "version|config|service|port|hardware|network|storage|dependency|security", "key": "fact_name", "value": "fact_value", "source": "command_name", "confidence": 0.9}
|
|
],
|
|
"config_paths": ["/etc/", "/boot/config/"],
|
|
"data_paths": ["/mnt/data", "/storage"],
|
|
"log_paths": ["/var/log/"],
|
|
"ports": [{"port": 22, "protocol": "tcp", "process": "sshd", "address": "0.0.0.0"}],
|
|
"confidence": 0.0-1.0,
|
|
"reasoning": "Explanation of host identification"
|
|
}
|
|
|
|
Important:
|
|
- The service_type and service_name MUST reflect the HOST OS, not services running on it
|
|
- List Docker containers, VMs, or other services as facts with category "service"
|
|
- Include storage information (disks, pools, arrays) as facts with category "storage"
|
|
- Include hardware info (CPU, RAM) as facts with category "hardware"
|
|
|
|
Respond with ONLY valid JSON.`, strings.Join(sections, "\n\n"))
|
|
}
|
|
|
|
return fmt.Sprintf(`Analyze this infrastructure resource and provide detailed discovery information.
|
|
|
|
%s
|
|
|
|
Based on all available information, determine:
|
|
1. What service/application is running?
|
|
2. What version is it?
|
|
3. What are the important configuration paths?
|
|
4. What data paths should be backed up?
|
|
5. What log paths are useful for troubleshooting?
|
|
6. What ports are in use?
|
|
7. Any special hardware (GPU, TPU, etc.)?
|
|
8. Any dependencies (databases, message queues, etc.)?
|
|
|
|
Respond in this exact JSON format:
|
|
{
|
|
"service_type": "lowercase_type (e.g., frigate, postgres, pbs)",
|
|
"service_name": "Human Readable Name",
|
|
"service_version": "version number if found",
|
|
"category": "database|web_server|cache|monitoring|backup|nvr|storage|container|virtualizer|network|security|media|home_automation|unknown",
|
|
"cli_access": "command to access this service's CLI",
|
|
"facts": [
|
|
{"category": "version|config|service|port|hardware|network|storage|dependency|security", "key": "fact_name", "value": "fact_value", "source": "command_name", "confidence": 0.9}
|
|
],
|
|
"config_paths": ["/path/to/config.yml"],
|
|
"data_paths": ["/path/to/data"],
|
|
"log_paths": ["/var/log/service/", "/path/to/app.log"],
|
|
"ports": [{"port": 8080, "protocol": "tcp", "process": "nginx", "address": "0.0.0.0"}],
|
|
"confidence": 0.0-1.0,
|
|
"reasoning": "Explanation of identification"
|
|
}
|
|
|
|
Important:
|
|
- Extract version numbers from package lists, process output, or config files
|
|
- Identify config and data paths from mount points and file listings
|
|
- Identify log paths (e.g., /var/log/, application-specific logs) for troubleshooting
|
|
- Note any special hardware like Coral TPU, NVIDIA GPU
|
|
- For LXC/VM, the CLI access should use pct exec or qm guest exec
|
|
- For Docker, use docker exec
|
|
|
|
Respond with ONLY valid JSON.`, strings.Join(sections, "\n\n"))
|
|
}
|
|
|
|
// parseAIResponse parses the AI's JSON response.
|
|
func (s *Service) parseAIResponse(response string) *AIAnalysisResponse {
|
|
log.Debug().Str("raw_response", response).Msg("Discovery raw response")
|
|
response = strings.TrimSpace(response)
|
|
|
|
// Handle markdown code blocks
|
|
if strings.HasPrefix(response, "```") {
|
|
lines := strings.Split(response, "\n")
|
|
var jsonLines []string
|
|
inBlock := false
|
|
for _, line := range lines {
|
|
if strings.HasPrefix(line, "```") {
|
|
inBlock = !inBlock
|
|
continue
|
|
}
|
|
if inBlock {
|
|
jsonLines = append(jsonLines, line)
|
|
}
|
|
}
|
|
response = strings.Join(jsonLines, "\n")
|
|
}
|
|
|
|
// Find JSON object
|
|
start := strings.Index(response, "{")
|
|
end := strings.LastIndex(response, "}")
|
|
if start >= 0 && end > start {
|
|
response = response[start : end+1]
|
|
}
|
|
|
|
var result AIAnalysisResponse
|
|
if err := json.Unmarshal([]byte(response), &result); err != nil {
|
|
log.Debug().Err(err).Str("response", response).Msg("Failed to parse AI response")
|
|
return nil
|
|
}
|
|
|
|
// Set discovered_at for facts
|
|
now := time.Now()
|
|
for i := range result.Facts {
|
|
result.Facts[i].DiscoveredAt = now
|
|
}
|
|
|
|
return &result
|
|
}
|
|
|
|
// parseDockerMounts parses the docker_mounts command output into a slice of DockerBindMount.
|
|
// The output format is:
|
|
// CONTAINER:container_name
|
|
// source|destination|type
|
|
// source|destination|type
|
|
// CONTAINER:another_container
|
|
// source|destination|type
|
|
func parseDockerMounts(output string) []DockerBindMount {
|
|
if output == "" || output == "no_docker_mounts" {
|
|
return nil
|
|
}
|
|
|
|
var mounts []DockerBindMount
|
|
var currentContainer string
|
|
|
|
lines := strings.Split(output, "\n")
|
|
for _, line := range lines {
|
|
line = strings.TrimSpace(line)
|
|
if line == "" {
|
|
continue
|
|
}
|
|
|
|
// Check if this is a container header
|
|
if strings.HasPrefix(line, "CONTAINER:") {
|
|
currentContainer = strings.TrimPrefix(line, "CONTAINER:")
|
|
continue
|
|
}
|
|
|
|
// Skip if we don't have a current container
|
|
if currentContainer == "" {
|
|
continue
|
|
}
|
|
|
|
// Parse mount line: source|destination|type
|
|
parts := strings.Split(line, "|")
|
|
if len(parts) < 2 {
|
|
continue
|
|
}
|
|
|
|
mount := DockerBindMount{
|
|
ContainerName: currentContainer,
|
|
Source: parts[0],
|
|
Destination: parts[1],
|
|
}
|
|
if len(parts) >= 3 {
|
|
mount.Type = parts[2]
|
|
}
|
|
|
|
// Only include bind mounts and volumes (skip tmpfs, etc.)
|
|
if mount.Type == "" || mount.Type == "bind" || mount.Type == "volume" {
|
|
mounts = append(mounts, mount)
|
|
}
|
|
}
|
|
|
|
return mounts
|
|
}
|
|
|
|
// GetDiscovery retrieves a discovery by ID.
|
|
func (s *Service) GetDiscovery(id string) (*ResourceDiscovery, error) {
|
|
d, err := s.store.Get(id)
|
|
if err != nil || d == nil {
|
|
return d, err
|
|
}
|
|
s.upgradeCLIAccessIfNeeded(d)
|
|
return d, nil
|
|
}
|
|
|
|
func (s *Service) GetDiscoveryByResource(resourceType ResourceType, hostID, resourceID string) (*ResourceDiscovery, error) {
|
|
originalHostID := hostID
|
|
originalResourceID := resourceID
|
|
redirected := false
|
|
|
|
// Redirect PVE node lookups to linked host agent if available
|
|
// This ensures UI components looking up a PVE node by name (e.g. NodeDrawer) get the data associated with the Host Agent
|
|
if resourceType == ResourceTypeHost && s.stateProvider != nil {
|
|
state := s.stateProvider.GetState()
|
|
for _, node := range state.Nodes {
|
|
if node.Name == hostID || node.Name == resourceID || node.ID == resourceID {
|
|
if node.LinkedHostAgentID != "" {
|
|
log.Debug().
|
|
Str("from_host", hostID).
|
|
Str("to_agent", node.LinkedHostAgentID).
|
|
Msg("Redirecting discovery lookup to linked host agent")
|
|
hostID = node.LinkedHostAgentID
|
|
resourceID = node.LinkedHostAgentID
|
|
redirected = true
|
|
}
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
d, err := s.store.GetByResource(resourceType, hostID, resourceID)
|
|
// If redirected and not found, try the original ID (fallback for unmigrated data)
|
|
if (err != nil || d == nil) && redirected {
|
|
log.Debug().
|
|
Str("redirected_host", hostID).
|
|
Str("original_host", originalHostID).
|
|
Msg("Redirected lookup failed, trying fallback to original ID")
|
|
dOriginal, errOriginal := s.store.GetByResource(resourceType, originalHostID, originalResourceID)
|
|
if errOriginal == nil && dOriginal != nil {
|
|
log.Debug().
|
|
Str("original_host", originalHostID).
|
|
Msg("Fallback lookup succeeded - returning legacy discovery")
|
|
s.upgradeCLIAccessIfNeeded(dOriginal)
|
|
return dOriginal, nil
|
|
}
|
|
}
|
|
|
|
if err != nil || d == nil {
|
|
return d, err
|
|
}
|
|
s.upgradeCLIAccessIfNeeded(d)
|
|
return d, nil
|
|
}
|
|
|
|
// ListDiscoveries returns all discoveries.
|
|
func (s *Service) ListDiscoveries() ([]*ResourceDiscovery, error) {
|
|
discoveries, err := s.store.List()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
discoveries = s.deduplicateDiscoveries(discoveries)
|
|
for _, d := range discoveries {
|
|
s.upgradeCLIAccessIfNeeded(d)
|
|
}
|
|
return discoveries, nil
|
|
}
|
|
|
|
// ListDiscoveriesByType returns discoveries for a specific resource type.
|
|
func (s *Service) ListDiscoveriesByType(resourceType ResourceType) ([]*ResourceDiscovery, error) {
|
|
discoveries, err := s.store.ListByType(resourceType)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
discoveries = s.deduplicateDiscoveries(discoveries)
|
|
for _, d := range discoveries {
|
|
s.upgradeCLIAccessIfNeeded(d)
|
|
}
|
|
return discoveries, nil
|
|
}
|
|
|
|
// ListDiscoveriesByHost returns discoveries for a specific host.
|
|
func (s *Service) ListDiscoveriesByHost(hostID string) ([]*ResourceDiscovery, error) {
|
|
discoveries, err := s.store.ListByHost(hostID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
discoveries = s.deduplicateDiscoveries(discoveries)
|
|
for _, d := range discoveries {
|
|
s.upgradeCLIAccessIfNeeded(d)
|
|
}
|
|
return discoveries, nil
|
|
}
|
|
|
|
// deduplicateDiscoveries filters out redundant discoveries where a PVE node
|
|
// is represented by both its Node Name and its Linked Host Agent ID.
|
|
// The Host Agent ID is preferred.
|
|
func (s *Service) deduplicateDiscoveries(discoveries []*ResourceDiscovery) []*ResourceDiscovery {
|
|
if s.stateProvider == nil {
|
|
return discoveries
|
|
}
|
|
|
|
state := s.stateProvider.GetState()
|
|
if len(state.Nodes) == 0 {
|
|
return discoveries
|
|
}
|
|
|
|
// Map linked agent IDs to their PVE node source(s)
|
|
// AgentID -> NodeName
|
|
linkedAgents := make(map[string]string)
|
|
for _, node := range state.Nodes {
|
|
if node.LinkedHostAgentID != "" {
|
|
linkedAgents[node.LinkedHostAgentID] = node.Name
|
|
}
|
|
}
|
|
|
|
if len(linkedAgents) == 0 {
|
|
return discoveries
|
|
}
|
|
|
|
// Check which agents actually have discovery data
|
|
hasAgentDiscovery := make(map[string]bool)
|
|
for _, d := range discoveries {
|
|
if d.ResourceType == ResourceTypeHost {
|
|
// d.HostID is usually the agent ID for host resources
|
|
if _, ok := linkedAgents[d.HostID]; ok {
|
|
hasAgentDiscovery[d.HostID] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
// Filter out PVE node discoveries if the corresponding agent discovery exists
|
|
filtered := make([]*ResourceDiscovery, 0, len(discoveries))
|
|
for _, d := range discoveries {
|
|
if d.ResourceType == ResourceTypeHost {
|
|
// If this discovery is for a PVE node (by name/ID)
|
|
// check if it maps to an agent that ALREADY has a discovery in this list
|
|
|
|
// Is this discovery's ID satisfying a Node check?
|
|
isPVENode := false
|
|
var linkedAgentID string
|
|
|
|
for _, node := range state.Nodes {
|
|
if d.HostID == node.Name || d.HostID == node.ID || d.ResourceID == node.Name {
|
|
isPVENode = true
|
|
linkedAgentID = node.LinkedHostAgentID
|
|
break
|
|
}
|
|
}
|
|
|
|
if isPVENode && linkedAgentID != "" && hasAgentDiscovery[linkedAgentID] && d.HostID != linkedAgentID {
|
|
// We have the agent discovery, so skip this redundant PVE node discovery
|
|
continue
|
|
}
|
|
}
|
|
filtered = append(filtered, d)
|
|
}
|
|
|
|
return filtered
|
|
}
|
|
|
|
// upgradeDiscoveryIfNeeded upgrades cached discovery fields to current versions.
|
|
// This ensures cached discoveries get the new instructional CLI access format
|
|
// and have hostname populated without requiring a full re-discovery.
|
|
func (s *Service) upgradeCLIAccessIfNeeded(d *ResourceDiscovery) {
|
|
if d == nil {
|
|
return
|
|
}
|
|
|
|
upgraded := false
|
|
|
|
// Upgrade CLI access if version is outdated
|
|
if d.CLIAccessVersion < CLIAccessVersion {
|
|
oldCLI := d.CLIAccess
|
|
d.CLIAccess = GetCLIAccessTemplate(d.ResourceType)
|
|
d.CLIAccessVersion = CLIAccessVersion
|
|
upgraded = true
|
|
|
|
log.Debug().
|
|
Str("id", d.ID).
|
|
Str("old_cli", oldCLI).
|
|
Str("new_cli", d.CLIAccess).
|
|
Int("new_version", CLIAccessVersion).
|
|
Msg("Upgraded CLI access pattern to new version")
|
|
}
|
|
|
|
// Fix empty hostname by looking up the resource name from state
|
|
if d.Hostname == "" && s.stateProvider != nil {
|
|
state := s.stateProvider.GetState()
|
|
hostname := s.lookupHostnameFromState(d.ResourceType, d.HostID, d.ResourceID, state)
|
|
if hostname != "" {
|
|
d.Hostname = hostname
|
|
upgraded = true
|
|
log.Debug().
|
|
Str("id", d.ID).
|
|
Str("hostname", hostname).
|
|
Msg("Populated missing hostname from state")
|
|
}
|
|
}
|
|
|
|
_ = upgraded // Suppress unused variable warning if logging is disabled
|
|
}
|
|
|
|
// lookupHostnameFromState finds the hostname/name for a resource from state
|
|
func (s *Service) lookupHostnameFromState(resourceType ResourceType, hostID, resourceID string, state StateSnapshot) string {
|
|
switch resourceType {
|
|
case ResourceTypeLXC:
|
|
for _, c := range state.Containers {
|
|
if fmt.Sprintf("%d", c.VMID) == resourceID && c.Node == hostID {
|
|
return c.Name
|
|
}
|
|
}
|
|
case ResourceTypeVM:
|
|
for _, vm := range state.VMs {
|
|
if fmt.Sprintf("%d", vm.VMID) == resourceID && vm.Node == hostID {
|
|
return vm.Name
|
|
}
|
|
}
|
|
case ResourceTypeDocker:
|
|
for _, host := range state.DockerHosts {
|
|
if host.AgentID == hostID || host.Hostname == hostID {
|
|
for _, c := range host.Containers {
|
|
if c.Name == resourceID {
|
|
return host.Hostname
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// UpdateNotes updates user notes for a discovery.
|
|
func (s *Service) UpdateNotes(id string, notes string, secrets map[string]string) error {
|
|
return s.store.UpdateNotes(id, notes, secrets)
|
|
}
|
|
|
|
// DeleteDiscovery deletes a discovery.
|
|
func (s *Service) DeleteDiscovery(id string) error {
|
|
return s.store.Delete(id)
|
|
}
|
|
|
|
// GetProgress returns the progress of an ongoing discovery.
|
|
func (s *Service) GetProgress(resourceID string) *DiscoveryProgress {
|
|
if s.scanner == nil {
|
|
return nil
|
|
}
|
|
return s.scanner.GetProgress(resourceID)
|
|
}
|
|
|
|
// GetStatus returns the service status including fingerprint statistics.
|
|
func (s *Service) GetStatus() map[string]any {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
s.cacheMu.RLock()
|
|
cacheSize := len(s.analysisCache)
|
|
s.cacheMu.RUnlock()
|
|
|
|
// Get fingerprint stats
|
|
fingerprintCount := 0
|
|
var lastFingerprintScan time.Time
|
|
if s.store != nil {
|
|
fingerprintCount = s.store.GetFingerprintCount()
|
|
lastFingerprintScan = s.store.GetLastFingerprintScan()
|
|
}
|
|
|
|
return map[string]any{
|
|
"running": s.running,
|
|
"last_run": s.lastRun,
|
|
"interval": s.interval.String(),
|
|
"cache_size": cacheSize,
|
|
"ai_analyzer_set": s.aiAnalyzer != nil,
|
|
"scanner_set": s.scanner != nil,
|
|
"store_set": s.store != nil,
|
|
"deep_scan_timeout": s.deepScanTimeout.String(),
|
|
"max_discovery_age": s.maxDiscoveryAge.String(),
|
|
"fingerprint_count": fingerprintCount,
|
|
"last_fingerprint_scan": lastFingerprintScan,
|
|
}
|
|
}
|
|
|
|
// GetMaxDiscoveryAge returns the current max discovery age (staleness threshold).
|
|
func (s *Service) GetMaxDiscoveryAge() time.Duration {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
return s.maxDiscoveryAge
|
|
}
|
|
|
|
// SetMaxDiscoveryAge updates the max discovery age (staleness threshold).
|
|
// Discoveries older than this duration will be re-run when requested.
|
|
func (s *Service) SetMaxDiscoveryAge(age time.Duration) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
// Enforce minimum of 1 day
|
|
if age < 24*time.Hour {
|
|
age = 24 * time.Hour
|
|
}
|
|
|
|
s.maxDiscoveryAge = age
|
|
log.Info().Dur("max_discovery_age", age).Msg("Max discovery age updated")
|
|
}
|
|
|
|
// ClearCache clears the AI analysis cache.
|
|
func (s *Service) ClearCache() {
|
|
s.cacheMu.Lock()
|
|
defer s.cacheMu.Unlock()
|
|
s.analysisCache = make(map[string]*analysisCacheEntry)
|
|
}
|
|
|
|
// --- AI Chat Integration Methods ---
|
|
|
|
// GetDiscoveryForAIChat returns discovery data for AI chat context.
|
|
// It will run discovery if needed (fingerprint changed or no data exists).
|
|
// This is the just-in-time discovery approach: only call AI when data is actually needed.
|
|
func (s *Service) GetDiscoveryForAIChat(ctx context.Context, resourceType ResourceType, hostID, resourceID string) (*ResourceDiscovery, error) {
|
|
// This is the same as DiscoverResource but without Force
|
|
return s.DiscoverResource(ctx, DiscoveryRequest{
|
|
ResourceType: resourceType,
|
|
ResourceID: resourceID,
|
|
HostID: hostID,
|
|
Force: false, // Let fingerprint logic decide
|
|
})
|
|
}
|
|
|
|
// GetDiscoveriesForAIContext returns discoveries for multiple resources.
|
|
// Used when AI chat needs context about the infrastructure.
|
|
// Only runs discovery for resources that actually need it (fingerprint changed).
|
|
func (s *Service) GetDiscoveriesForAIContext(ctx context.Context, resourceIDs []string) ([]*ResourceDiscovery, error) {
|
|
var results []*ResourceDiscovery
|
|
for _, id := range resourceIDs {
|
|
resourceType, hostID, resourceID, err := ParseResourceID(id)
|
|
if err != nil {
|
|
log.Debug().Err(err).Str("id", id).Msg("Failed to parse resource ID for AI context")
|
|
continue
|
|
}
|
|
discovery, err := s.GetDiscoveryForAIChat(ctx, resourceType, hostID, resourceID)
|
|
if err != nil {
|
|
log.Debug().Err(err).Str("id", id).Msg("Failed to get discovery for AI context")
|
|
continue
|
|
}
|
|
if discovery != nil {
|
|
results = append(results, discovery)
|
|
}
|
|
}
|
|
return results, nil
|
|
}
|
|
|
|
// GetChangedResourceCount returns the count of resources whose fingerprint has changed
|
|
// since their last discovery.
|
|
func (s *Service) GetChangedResourceCount() (int, error) {
|
|
if s.store == nil {
|
|
return 0, nil
|
|
}
|
|
changed, err := s.store.GetChangedResources()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return len(changed), nil
|
|
}
|
|
|
|
// GetStaleResourceCount returns the count of resources whose discovery is older
|
|
// than maxDiscoveryAge.
|
|
func (s *Service) GetStaleResourceCount() (int, error) {
|
|
if s.store == nil {
|
|
return 0, nil
|
|
}
|
|
stale, err := s.store.GetStaleResources(s.maxDiscoveryAge)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return len(stale), nil
|
|
}
|