mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-07 17:19:57 +00:00
561 lines
16 KiB
Go
561 lines
16 KiB
Go
package servicediscovery
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
// CommandExecutor executes commands on infrastructure.
|
|
type CommandExecutor interface {
|
|
ExecuteCommand(ctx context.Context, agentID string, cmd ExecuteCommandPayload) (*CommandResultPayload, error)
|
|
GetConnectedAgents() []ConnectedAgent
|
|
IsAgentConnected(agentID string) bool
|
|
}
|
|
|
|
// ExecuteCommandPayload mirrors agentexec.ExecuteCommandPayload
|
|
type ExecuteCommandPayload struct {
|
|
RequestID string `json:"request_id"`
|
|
Command string `json:"command"`
|
|
TargetType string `json:"target_type"` // "agent", "container", "vm"
|
|
TargetID string `json:"target_id,omitempty"` // VMID for container/VM
|
|
Timeout int `json:"timeout,omitempty"`
|
|
}
|
|
|
|
// CommandResultPayload mirrors agentexec.CommandResultPayload
|
|
type CommandResultPayload struct {
|
|
RequestID string `json:"request_id"`
|
|
Success bool `json:"success"`
|
|
Stdout string `json:"stdout,omitempty"`
|
|
Stderr string `json:"stderr,omitempty"`
|
|
ExitCode int `json:"exit_code"`
|
|
Error string `json:"error,omitempty"`
|
|
Duration int64 `json:"duration_ms"`
|
|
}
|
|
|
|
// ConnectedAgent mirrors agentexec.ConnectedAgent
|
|
type ConnectedAgent struct {
|
|
AgentID string
|
|
Hostname string
|
|
Version string
|
|
Platform string
|
|
Tags []string
|
|
ConnectedAt time.Time
|
|
}
|
|
|
|
// ProgressCallback is called when discovery progress changes.
|
|
type ProgressCallback func(*DiscoveryProgress)
|
|
|
|
// DeepScanner runs discovery commands on resources.
|
|
type DeepScanner struct {
|
|
executor CommandExecutor
|
|
mu sync.RWMutex
|
|
progress map[string]*DiscoveryProgress // resourceID -> progress
|
|
maxParallel int
|
|
timeout time.Duration
|
|
progressCallback ProgressCallback
|
|
}
|
|
|
|
const (
|
|
defaultDeepScannerMaxParallel = 3
|
|
defaultDeepScannerTimeout = 30 * time.Second
|
|
)
|
|
|
|
// NewDeepScanner creates a new deep scanner.
|
|
func NewDeepScanner(executor CommandExecutor) *DeepScanner {
|
|
return &DeepScanner{
|
|
executor: executor,
|
|
progress: make(map[string]*DiscoveryProgress),
|
|
maxParallel: defaultDeepScannerMaxParallel, // Run up to 3 commands in parallel per resource
|
|
timeout: defaultDeepScannerTimeout,
|
|
}
|
|
}
|
|
|
|
func (s *DeepScanner) runtimeSettings() (int, time.Duration) {
|
|
s.mu.RLock()
|
|
maxParallel := s.maxParallel
|
|
timeout := s.timeout
|
|
s.mu.RUnlock()
|
|
|
|
if maxParallel <= 0 {
|
|
log.Warn().
|
|
Int("max_parallel", maxParallel).
|
|
Int("default", defaultDeepScannerMaxParallel).
|
|
Msg("Invalid deep scanner max parallelism; using default")
|
|
maxParallel = defaultDeepScannerMaxParallel
|
|
}
|
|
if timeout <= 0 {
|
|
log.Warn().
|
|
Dur("timeout", timeout).
|
|
Dur("default", defaultDeepScannerTimeout).
|
|
Msg("Invalid deep scanner timeout; using default")
|
|
timeout = defaultDeepScannerTimeout
|
|
}
|
|
|
|
return maxParallel, timeout
|
|
}
|
|
|
|
// SetProgressCallback sets a callback function that will be called when discovery progress changes.
|
|
func (s *DeepScanner) SetProgressCallback(callback ProgressCallback) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
s.progressCallback = callback
|
|
}
|
|
|
|
// notifyProgress calls the progress callback if set.
|
|
func (s *DeepScanner) notifyProgress(progress *DiscoveryProgress) {
|
|
s.mu.RLock()
|
|
callback := s.progressCallback
|
|
s.mu.RUnlock()
|
|
|
|
if callback != nil && progress != nil {
|
|
// Calculate elapsed time and percent complete
|
|
progressCopy := *progress
|
|
if !progress.StartedAt.IsZero() {
|
|
progressCopy.ElapsedMs = time.Since(progress.StartedAt).Milliseconds()
|
|
}
|
|
if progress.TotalSteps > 0 {
|
|
progressCopy.PercentComplete = float64(progress.CompletedSteps) / float64(progress.TotalSteps) * 100
|
|
}
|
|
callback(&progressCopy)
|
|
}
|
|
}
|
|
|
|
// ScanResult contains the results of a deep scan.
|
|
type ScanResult struct {
|
|
ResourceType ResourceType
|
|
ResourceID string
|
|
TargetID string
|
|
Hostname string
|
|
CommandOutputs map[string]string
|
|
Errors map[string]string
|
|
StartedAt time.Time
|
|
CompletedAt time.Time
|
|
}
|
|
|
|
// Scan runs discovery commands on a resource and returns the outputs.
|
|
func (s *DeepScanner) Scan(ctx context.Context, req DiscoveryRequest) (*ScanResult, error) {
|
|
requestTargetID := canonicalRequestTargetID(req)
|
|
resourceID := MakeResourceID(req.ResourceType, requestTargetID, req.ResourceID)
|
|
startTime := time.Now()
|
|
scanLog := log.With().
|
|
Str("component", "service_discovery_scanner").
|
|
Str("resource_id", resourceID).
|
|
Str("resource_type", string(req.ResourceType)).
|
|
Str("target_id", requestTargetID).
|
|
Str("hostname", req.Hostname).
|
|
Logger()
|
|
|
|
// Initialize progress
|
|
s.mu.Lock()
|
|
s.progress[resourceID] = &DiscoveryProgress{
|
|
ResourceID: resourceID,
|
|
Status: DiscoveryStatusRunning,
|
|
CurrentStep: "initializing",
|
|
StartedAt: startTime,
|
|
}
|
|
initialProgress := *s.progress[resourceID]
|
|
s.mu.Unlock()
|
|
|
|
// Broadcast scan start
|
|
s.notifyProgress(&initialProgress)
|
|
|
|
defer func() {
|
|
s.mu.Lock()
|
|
delete(s.progress, resourceID)
|
|
s.mu.Unlock()
|
|
}()
|
|
|
|
result := &ScanResult{
|
|
ResourceType: req.ResourceType,
|
|
ResourceID: req.ResourceID,
|
|
TargetID: requestTargetID,
|
|
Hostname: req.Hostname,
|
|
CommandOutputs: make(map[string]string),
|
|
Errors: make(map[string]string),
|
|
StartedAt: time.Now(),
|
|
}
|
|
|
|
// Check if we have an agent for this host
|
|
if s.executor == nil {
|
|
scanLog.Warn().
|
|
Str("action", "scan_precondition_failed").
|
|
Str("reason", "executor_missing").
|
|
Msg("Deep scan unavailable")
|
|
return nil, fmt.Errorf("no command executor available")
|
|
}
|
|
|
|
// Find the agent for this host
|
|
agentID := s.findAgentForTarget(requestTargetID, req.Hostname)
|
|
if agentID == "" {
|
|
scanLog.Warn().
|
|
Str("action", "scan_precondition_failed").
|
|
Str("reason", "agent_not_connected").
|
|
Msg("Deep scan unavailable")
|
|
return nil, fmt.Errorf("no connected agent for target %s (%s)", requestTargetID, req.Hostname)
|
|
}
|
|
|
|
// Get commands for this resource type
|
|
commands := GetCommandsForResource(req.ResourceType)
|
|
if len(commands) == 0 {
|
|
scanLog.Warn().
|
|
Str("action", "scan_precondition_failed").
|
|
Str("reason", "commands_not_defined").
|
|
Msg("Deep scan unavailable")
|
|
return nil, fmt.Errorf("no commands defined for resource type %s", req.ResourceType)
|
|
}
|
|
|
|
// Update progress
|
|
s.mu.Lock()
|
|
if prog, ok := s.progress[resourceID]; ok {
|
|
prog.TotalSteps = len(commands)
|
|
prog.CurrentStep = "running commands"
|
|
progressCopy := *prog
|
|
s.mu.Unlock()
|
|
s.notifyProgress(&progressCopy)
|
|
} else {
|
|
s.mu.Unlock()
|
|
}
|
|
|
|
// Get runtime settings for parallelism and timeouts
|
|
maxParallel, timeout := s.runtimeSettings()
|
|
|
|
// Run commands with limited parallelism
|
|
semaphore := make(chan struct{}, maxParallel)
|
|
var wg sync.WaitGroup
|
|
var mu sync.Mutex
|
|
|
|
for _, cmd := range commands {
|
|
wg.Add(1)
|
|
go func(cmd DiscoveryCommand) {
|
|
defer wg.Done()
|
|
|
|
select {
|
|
case semaphore <- struct{}{}:
|
|
defer func() { <-semaphore }()
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
|
|
// Build the actual command to run
|
|
actualCmd := s.buildCommand(req.ResourceType, req.ResourceID, cmd.Command)
|
|
|
|
// Get the target ID for the agent
|
|
targetID := s.getTargetID(req.ResourceType, req.ResourceID)
|
|
|
|
// Only validate TargetID when it will be interpolated into shell commands
|
|
// by the agent (container/vm types). Agent/docker types don't use TargetID
|
|
// in command wrapping, so they can have any format (including colons for IPv6).
|
|
targetType := s.getTargetType(req.ResourceType)
|
|
if targetType == "container" || targetType == "vm" {
|
|
if err := ValidateResourceID(targetID); err != nil {
|
|
mu.Lock()
|
|
result.Errors[cmd.Name] = fmt.Sprintf("invalid target ID: %v", err)
|
|
mu.Unlock()
|
|
return
|
|
}
|
|
}
|
|
|
|
// Execute the command
|
|
cmdCtx, cancel := context.WithTimeout(ctx, timeout)
|
|
defer cancel()
|
|
|
|
cmdResult, err := s.executor.ExecuteCommand(cmdCtx, agentID, ExecuteCommandPayload{
|
|
RequestID: uuid.New().String(),
|
|
Command: actualCmd,
|
|
TargetType: s.getTargetType(req.ResourceType),
|
|
TargetID: targetID,
|
|
Timeout: cmd.Timeout,
|
|
})
|
|
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
|
|
if err != nil {
|
|
if !cmd.Optional {
|
|
result.Errors[cmd.Name] = err.Error()
|
|
}
|
|
scanLog.Debug().
|
|
Str("action", "command_execute_failed").
|
|
Err(err).
|
|
Str("command", cmd.Name).
|
|
Bool("optional", cmd.Optional).
|
|
Msg("Command failed during discovery")
|
|
return
|
|
}
|
|
|
|
if cmdResult != nil {
|
|
output := cmdResult.Stdout
|
|
if cmdResult.Stderr != "" && output != "" {
|
|
output += "\n--- stderr ---\n" + cmdResult.Stderr
|
|
} else if cmdResult.Stderr != "" {
|
|
output = cmdResult.Stderr
|
|
}
|
|
|
|
if output != "" {
|
|
result.CommandOutputs[cmd.Name] = output
|
|
}
|
|
|
|
if !cmdResult.Success && cmdResult.Error != "" && !cmd.Optional {
|
|
result.Errors[cmd.Name] = cmdResult.Error
|
|
}
|
|
|
|
if !cmdResult.Success {
|
|
event := scanLog.Debug()
|
|
if !cmd.Optional {
|
|
event = scanLog.Warn()
|
|
}
|
|
event.
|
|
Str("action", "command_result_failed").
|
|
Str("command", cmd.Name).
|
|
Bool("optional", cmd.Optional).
|
|
Int("exit_code", cmdResult.ExitCode).
|
|
Str("request_id", cmdResult.RequestID).
|
|
Str("command_error", cmdResult.Error).
|
|
Msg("Deep scan command reported failure")
|
|
}
|
|
}
|
|
|
|
// Update progress and broadcast
|
|
s.mu.Lock()
|
|
if prog, ok := s.progress[resourceID]; ok {
|
|
prog.CompletedSteps++
|
|
prog.CurrentCommand = cmd.Name
|
|
progressCopy := *prog
|
|
s.mu.Unlock()
|
|
s.notifyProgress(&progressCopy)
|
|
} else {
|
|
s.mu.Unlock()
|
|
}
|
|
}(cmd)
|
|
}
|
|
|
|
wg.Wait()
|
|
result.CompletedAt = time.Now()
|
|
|
|
// Broadcast scan completion
|
|
completionProgress := DiscoveryProgress{
|
|
ResourceID: resourceID,
|
|
Status: DiscoveryStatusCompleted,
|
|
CurrentStep: "completed",
|
|
TotalSteps: len(commands),
|
|
CompletedSteps: len(commands),
|
|
StartedAt: startTime,
|
|
ElapsedMs: result.CompletedAt.Sub(startTime).Milliseconds(),
|
|
PercentComplete: 100,
|
|
}
|
|
s.notifyProgress(&completionProgress)
|
|
|
|
scanLog.Info().
|
|
Str("action", "scan_completed").
|
|
Int("outputs", len(result.CommandOutputs)).
|
|
Int("errors", len(result.Errors)).
|
|
Dur("duration", result.CompletedAt.Sub(result.StartedAt)).
|
|
Msg("Deep scan completed")
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// buildCommand wraps the command appropriately for the resource type.
|
|
// NOTE: For LXC/VM, the agent handles wrapping via pct exec / qm guest exec
|
|
// based on TargetType, so we don't wrap here. We only wrap for Docker containers
|
|
// since Docker isn't a recognized TargetType in the agent.
|
|
func (s *DeepScanner) buildCommand(resourceType ResourceType, resourceID string, cmd string) string {
|
|
switch resourceType {
|
|
case ResourceTypeSystemContainer:
|
|
// Agent wraps with pct exec based on TargetType="container"
|
|
return cmd
|
|
case ResourceTypeVM:
|
|
// Agent wraps with qm guest exec based on TargetType="vm"
|
|
return cmd
|
|
case ResourceTypeDocker:
|
|
// Docker needs wrapping here since agent doesn't handle it
|
|
return BuildDockerCommand(resourceID, cmd)
|
|
case ResourceTypeAgent:
|
|
// Commands run directly on host
|
|
return cmd
|
|
case ResourceTypeDockerSystemContainer:
|
|
// Docker inside system container - agent wraps with pct exec, we just add docker exec
|
|
// resourceID format: "vmid:container_name"
|
|
parts := splitResourceID(resourceID)
|
|
if len(parts) >= 2 {
|
|
return BuildDockerCommand(parts[1], cmd)
|
|
}
|
|
return cmd
|
|
case ResourceTypeDockerVM:
|
|
// Docker inside VM - agent wraps with qm guest exec, we just add docker exec
|
|
parts := splitResourceID(resourceID)
|
|
if len(parts) >= 2 {
|
|
return BuildDockerCommand(parts[1], cmd)
|
|
}
|
|
return cmd
|
|
default:
|
|
return cmd
|
|
}
|
|
}
|
|
|
|
// getTargetType returns the target type for the agent execution payload.
|
|
func (s *DeepScanner) getTargetType(resourceType ResourceType) string {
|
|
switch resourceType {
|
|
case ResourceTypeSystemContainer:
|
|
return "container"
|
|
case ResourceTypeVM:
|
|
return "vm"
|
|
case ResourceTypeDocker:
|
|
return "agent" // Docker commands run on agent host via docker exec
|
|
case ResourceTypeDockerSystemContainer:
|
|
return "container" // Docker inside system container: agent wraps with pct exec
|
|
case ResourceTypeDockerVM:
|
|
return "vm" // Docker inside VM: agent wraps with qm guest exec
|
|
case ResourceTypeAgent:
|
|
return "agent"
|
|
default:
|
|
return "agent"
|
|
}
|
|
}
|
|
|
|
// getTargetID returns the target ID for the agent execution payload.
|
|
// For nested Docker (docker_lxc/docker_vm), this extracts just the vmid.
|
|
func (s *DeepScanner) getTargetID(resourceType ResourceType, resourceID string) string {
|
|
switch resourceType {
|
|
case ResourceTypeDockerSystemContainer, ResourceTypeDockerVM:
|
|
// resourceID format: "vmid:container_name" - extract just vmid
|
|
parts := splitResourceID(resourceID)
|
|
if len(parts) >= 1 {
|
|
return parts[0]
|
|
}
|
|
return resourceID
|
|
default:
|
|
return resourceID
|
|
}
|
|
}
|
|
|
|
// findAgentForTarget finds the agent ID for a given canonical target.
|
|
func (s *DeepScanner) findAgentForTarget(targetID, hostname string) string {
|
|
agents := s.executor.GetConnectedAgents()
|
|
|
|
log.Debug().
|
|
Str("component", "service_discovery_scanner").
|
|
Str("action", "find_agent_for_target").
|
|
Str("target_id", targetID).
|
|
Str("hostname", hostname).
|
|
Int("connected_agents", len(agents)).
|
|
Msg("Finding agent for target")
|
|
|
|
// Log connected agents for debugging
|
|
for _, agent := range agents {
|
|
log.Debug().
|
|
Str("component", "service_discovery_scanner").
|
|
Str("action", "find_agent_for_target").
|
|
Str("agent_id", agent.AgentID).
|
|
Str("agent_hostname", agent.Hostname).
|
|
Msg("Connected agent")
|
|
}
|
|
|
|
// First try exact match on agent ID
|
|
for _, agent := range agents {
|
|
if agent.AgentID == targetID {
|
|
return agent.AgentID
|
|
}
|
|
}
|
|
|
|
// Then try hostname match
|
|
for _, agent := range agents {
|
|
if agent.Hostname == hostname || agent.Hostname == targetID {
|
|
return agent.AgentID
|
|
}
|
|
}
|
|
|
|
// If only one agent connected, use it
|
|
if len(agents) == 1 {
|
|
return agents[0].AgentID
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
// GetProgress returns a copy of the current progress of a scan.
|
|
// Returns nil if no scan is in progress for the resource.
|
|
// A copy is returned to avoid data races with the scan goroutine.
|
|
func (s *DeepScanner) GetProgress(resourceID string) *DiscoveryProgress {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
if prog, ok := s.progress[resourceID]; ok {
|
|
// Return a copy to avoid race with scan goroutine
|
|
copy := *prog
|
|
return ©
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// IsScanning returns whether a resource is currently being scanned.
|
|
func (s *DeepScanner) IsScanning(resourceID string) bool {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
_, ok := s.progress[resourceID]
|
|
return ok
|
|
}
|
|
|
|
// splitResourceID splits a compound resource ID (e.g., "101:container_name").
|
|
func splitResourceID(id string) []string {
|
|
var parts []string
|
|
start := 0
|
|
for i, c := range id {
|
|
if c == ':' {
|
|
parts = append(parts, id[start:i])
|
|
start = i + 1
|
|
}
|
|
}
|
|
if start < len(id) {
|
|
parts = append(parts, id[start:])
|
|
}
|
|
return parts
|
|
}
|
|
|
|
// ScanDocker runs discovery on Docker containers via the target agent.
|
|
func (s *DeepScanner) ScanDocker(ctx context.Context, targetID, hostname, containerName string) (*ScanResult, error) {
|
|
req := DiscoveryRequest{
|
|
ResourceType: ResourceTypeDocker,
|
|
ResourceID: containerName,
|
|
TargetID: targetID,
|
|
Hostname: hostname,
|
|
}
|
|
return s.Scan(ctx, req)
|
|
}
|
|
|
|
// ScanSystemContainer runs discovery on a system container (LXC).
|
|
func (s *DeepScanner) ScanSystemContainer(ctx context.Context, targetID, hostname, vmid string) (*ScanResult, error) {
|
|
req := DiscoveryRequest{
|
|
ResourceType: ResourceTypeSystemContainer,
|
|
ResourceID: vmid,
|
|
TargetID: targetID,
|
|
Hostname: hostname,
|
|
}
|
|
return s.Scan(ctx, req)
|
|
}
|
|
|
|
// ScanVM runs discovery on a VM via QEMU guest agent.
|
|
func (s *DeepScanner) ScanVM(ctx context.Context, targetID, hostname, vmid string) (*ScanResult, error) {
|
|
req := DiscoveryRequest{
|
|
ResourceType: ResourceTypeVM,
|
|
ResourceID: vmid,
|
|
TargetID: targetID,
|
|
Hostname: hostname,
|
|
}
|
|
return s.Scan(ctx, req)
|
|
}
|
|
|
|
// ScanHost runs discovery on an agent target system.
|
|
func (s *DeepScanner) ScanHost(ctx context.Context, targetID, hostname string) (*ScanResult, error) {
|
|
req := DiscoveryRequest{
|
|
ResourceType: ResourceTypeAgent,
|
|
ResourceID: targetID,
|
|
TargetID: targetID,
|
|
Hostname: hostname,
|
|
}
|
|
return s.Scan(ctx, req)
|
|
}
|