mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-07 00:37:36 +00:00
416 lines
18 KiB
Go
416 lines
18 KiB
Go
// Package discovery provides AI-powered infrastructure discovery capabilities.
|
|
// It discovers services, versions, configurations, and CLI access methods
|
|
// for VMs, containers, Docker containers, Kubernetes pods, and hosts.
|
|
package servicediscovery
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// ResourceType identifies the type of infrastructure resource.
|
|
type ResourceType string
|
|
|
|
const (
|
|
ResourceTypeVM ResourceType = "vm"
|
|
ResourceTypeSystemContainer ResourceType = "system-container"
|
|
ResourceTypeDocker ResourceType = "docker"
|
|
ResourceTypeK8s ResourceType = "k8s"
|
|
ResourceTypeAgent ResourceType = "agent"
|
|
|
|
// Scan-only execution targets for Docker running inside another canonical resource.
|
|
ResourceTypeDockerVM ResourceType = "docker_vm" // Docker on a VM
|
|
ResourceTypeDockerSystemContainer ResourceType = "docker_system-container" // Docker in a system container
|
|
)
|
|
|
|
const (
|
|
legacyHostAlias ResourceType = "host"
|
|
legacyResourceTypeLXC ResourceType = "lxc"
|
|
legacyResourceTypeDockerLXC ResourceType = "docker_lxc"
|
|
)
|
|
|
|
type resourceTypeClass string
|
|
|
|
const (
|
|
resourceTypeClassCanonical resourceTypeClass = "canonical"
|
|
resourceTypeClassExecutionOnly resourceTypeClass = "execution-only"
|
|
resourceTypeClassLegacyAlias resourceTypeClass = "legacy-alias"
|
|
resourceTypeClassUnknown resourceTypeClass = "unknown"
|
|
)
|
|
|
|
func classifyResourceType(rt ResourceType) resourceTypeClass {
|
|
switch NormalizeResourceType(rt) {
|
|
case ResourceTypeVM, ResourceTypeSystemContainer, ResourceTypeDocker, ResourceTypeK8s, ResourceTypeAgent:
|
|
return resourceTypeClassCanonical
|
|
case ResourceTypeDockerVM, ResourceTypeDockerSystemContainer:
|
|
return resourceTypeClassExecutionOnly
|
|
case legacyHostAlias, legacyResourceTypeLXC, legacyResourceTypeDockerLXC:
|
|
return resourceTypeClassLegacyAlias
|
|
default:
|
|
return resourceTypeClassUnknown
|
|
}
|
|
}
|
|
|
|
// NormalizeResourceType keeps resource type handling whitespace-stable without
|
|
// rewriting legacy or scan-only vocabulary into canonical types.
|
|
func NormalizeResourceType(rt ResourceType) ResourceType {
|
|
return ResourceType(strings.TrimSpace(string(rt)))
|
|
}
|
|
|
|
// IsCanonicalDiscoveryType reports whether the resource type is part of the
|
|
// canonical v6 discovery model and may be persisted or queried as discovery data.
|
|
func (rt ResourceType) IsCanonicalDiscoveryType() bool {
|
|
return classifyResourceType(rt) == resourceTypeClassCanonical
|
|
}
|
|
|
|
// IsExecutionOnlyDiscoveryType reports whether the resource type is only valid
|
|
// for internal scan execution routing and must not be stored as a discovery type.
|
|
func (rt ResourceType) IsExecutionOnlyDiscoveryType() bool {
|
|
return classifyResourceType(rt) == resourceTypeClassExecutionOnly
|
|
}
|
|
|
|
// ValidateCanonicalDiscoveryResourceType enforces that service-discovery
|
|
// persistence and query paths use canonical v6 resource vocabulary only.
|
|
func ValidateCanonicalDiscoveryResourceType(rt ResourceType) error {
|
|
rt = NormalizeResourceType(rt)
|
|
|
|
switch classifyResourceType(rt) {
|
|
case resourceTypeClassCanonical:
|
|
return nil
|
|
case resourceTypeClassExecutionOnly:
|
|
return fmt.Errorf("resource type %q is an execution-only scan type, not a canonical discovery resource type", rt)
|
|
case resourceTypeClassLegacyAlias:
|
|
return fmt.Errorf("resource type %q is a legacy alias, not a canonical discovery resource type", rt)
|
|
default:
|
|
return fmt.Errorf("resource type %q is not a recognized discovery resource type", rt)
|
|
}
|
|
}
|
|
|
|
// FactCategory categorizes discovery facts.
|
|
type FactCategory string
|
|
|
|
const (
|
|
FactCategoryVersion FactCategory = "version"
|
|
FactCategoryService FactCategory = "service"
|
|
FactCategoryHardware FactCategory = "hardware"
|
|
FactCategoryStorage FactCategory = "storage"
|
|
FactCategoryDependency FactCategory = "dependency"
|
|
FactCategorySecurity FactCategory = "security"
|
|
)
|
|
|
|
// ServiceCategory categorizes the type of service discovered.
|
|
type ServiceCategory string
|
|
|
|
const (
|
|
CategoryDatabase ServiceCategory = "database"
|
|
CategoryWebServer ServiceCategory = "web_server"
|
|
CategoryCache ServiceCategory = "cache"
|
|
CategoryMonitoring ServiceCategory = "monitoring"
|
|
CategoryBackup ServiceCategory = "backup"
|
|
CategoryNVR ServiceCategory = "nvr"
|
|
CategoryStorage ServiceCategory = "storage"
|
|
CategoryVirtualizer ServiceCategory = "virtualizer"
|
|
CategoryNetwork ServiceCategory = "network"
|
|
CategorySecurity ServiceCategory = "security"
|
|
CategoryMedia ServiceCategory = "media"
|
|
CategoryHomeAuto ServiceCategory = "home_automation"
|
|
CategoryUnknown ServiceCategory = "unknown"
|
|
)
|
|
|
|
// ResourceDiscovery is the main data model for discovered resource information.
|
|
type ResourceDiscovery struct {
|
|
// Identity
|
|
ID string `json:"id"` // Unique ID: "system-container:minipc:101"
|
|
ResourceType ResourceType `json:"resource_type"` // vm, system-container, docker, k8s, agent
|
|
ResourceID string `json:"resource_id"` // 101, container-name, etc.
|
|
TargetID string `json:"target_id,omitempty"`
|
|
AgentID string `json:"agent_id,omitempty"`
|
|
Hostname string `json:"hostname"` // Human-readable host name
|
|
|
|
// AI-discovered info
|
|
ServiceType string `json:"service_type"` // frigate, postgres, pbs
|
|
ServiceName string `json:"service_name"` // Human-readable name
|
|
ServiceVersion string `json:"service_version"` // v0.13.2
|
|
Category ServiceCategory `json:"category"` // nvr, database, backup
|
|
CLIAccess string `json:"cli_access"` // pct exec 101 -- ...
|
|
|
|
// Deep discovery facts
|
|
Facts []DiscoveryFact `json:"facts"`
|
|
ConfigPaths []string `json:"config_paths"`
|
|
DataPaths []string `json:"data_paths"`
|
|
LogPaths []string `json:"log_paths"`
|
|
Ports []PortInfo `json:"ports"`
|
|
DockerMounts []DockerBindMount `json:"docker_mounts,omitempty"` // Docker container bind mounts (source->dest)
|
|
|
|
// User-added (also encrypted)
|
|
UserNotes string `json:"user_notes"`
|
|
UserSecrets map[string]string `json:"user_secrets"` // tokens, creds
|
|
|
|
// Metadata
|
|
Confidence float64 `json:"confidence"` // 0-1 confidence score
|
|
AIReasoning string `json:"ai_reasoning"` // AI explanation
|
|
DiscoveredAt time.Time `json:"discovered_at"` // First discovery
|
|
UpdatedAt time.Time `json:"updated_at"` // Last update
|
|
ScanDuration int64 `json:"scan_duration"` // Scan duration in ms
|
|
|
|
// Fingerprint tracking for just-in-time discovery
|
|
Fingerprint string `json:"fingerprint,omitempty"` // Hash when discovery was done
|
|
FingerprintedAt time.Time `json:"fingerprinted_at,omitempty"` // When fingerprint was captured
|
|
FingerprintSchemaVersion int `json:"fingerprint_schema_version,omitempty"` // Schema version when fingerprint was captured
|
|
CLIAccessVersion int `json:"cli_access_version,omitempty"` // Version of CLI access pattern format
|
|
|
|
// Raw data for debugging/re-analysis
|
|
RawCommandOutput map[string]string `json:"raw_command_output,omitempty"`
|
|
|
|
// Auto-suggested web interface URL based on service type and discovered ports
|
|
SuggestedURL string `json:"suggested_url,omitempty"`
|
|
SuggestedURLSourceCode string `json:"suggested_url_source_code,omitempty"`
|
|
SuggestedURLSourceDetail string `json:"suggested_url_source_detail,omitempty"`
|
|
SuggestedURLDiagnostic string `json:"suggested_url_diagnostic,omitempty"`
|
|
}
|
|
|
|
// DiscoveryFact represents a single discovered fact about a resource.
|
|
type DiscoveryFact struct {
|
|
Category FactCategory `json:"category"` // version, config, service, port
|
|
Key string `json:"key"` // e.g., "coral_tpu", "mqtt_broker"
|
|
Value string `json:"value"` // e.g., "/dev/apex_0", "mosquitto:1883"
|
|
Source string `json:"source"` // command that found this
|
|
Confidence float64 `json:"confidence"` // 0-1 confidence for this fact
|
|
DiscoveredAt time.Time `json:"discovered_at"`
|
|
}
|
|
|
|
// PortInfo represents information about a listening port.
|
|
type PortInfo struct {
|
|
Port int `json:"port"`
|
|
Protocol string `json:"protocol"` // tcp, udp
|
|
Process string `json:"process"` // process name
|
|
Address string `json:"address"` // bind address
|
|
}
|
|
|
|
// DockerBindMount represents a Docker bind mount with source and destination paths.
|
|
// This is critical for knowing where to actually edit files - the source path on the
|
|
// host filesystem, not the destination path inside the container.
|
|
type DockerBindMount struct {
|
|
ContainerName string `json:"container_name"` // Docker container name
|
|
Source string `json:"source"` // Host path (where to actually write files)
|
|
Destination string `json:"destination"` // Container path (what the service sees)
|
|
Type string `json:"type,omitempty"` // Mount type: bind, volume, tmpfs
|
|
ReadOnly bool `json:"read_only,omitempty"` // Whether mount is read-only
|
|
}
|
|
|
|
// MakeResourceID creates a standardized resource ID.
|
|
func MakeResourceID(resourceType ResourceType, targetID, resourceID string) string {
|
|
return fmt.Sprintf("%s:%s:%s", resourceType, targetID, resourceID)
|
|
}
|
|
|
|
// ParseResourceID parses a resource ID into its components.
|
|
func ParseResourceID(id string) (resourceType ResourceType, targetID, resourceID string, err error) {
|
|
var parts [3]string
|
|
count := 0
|
|
start := 0
|
|
for i, c := range id {
|
|
if c == ':' {
|
|
if count < 2 {
|
|
parts[count] = id[start:i]
|
|
count++
|
|
start = i + 1
|
|
}
|
|
}
|
|
}
|
|
if count == 2 {
|
|
parts[2] = id[start:]
|
|
return ResourceType(parts[0]), parts[1], parts[2], nil
|
|
}
|
|
return "", "", "", fmt.Errorf("invalid resource ID format: %s", id)
|
|
}
|
|
|
|
// DiscoveryRequest represents a request to discover a resource.
|
|
type DiscoveryRequest struct {
|
|
ResourceType ResourceType `json:"resource_type"`
|
|
ResourceID string `json:"resource_id"`
|
|
TargetID string `json:"target_id,omitempty"`
|
|
Hostname string `json:"hostname"`
|
|
Force bool `json:"force"` // Force re-scan even if recent
|
|
}
|
|
|
|
// DiscoveryStatus represents the status of a discovery scan.
|
|
type DiscoveryStatus string
|
|
|
|
const (
|
|
DiscoveryStatusRunning DiscoveryStatus = "running"
|
|
DiscoveryStatusCompleted DiscoveryStatus = "completed"
|
|
)
|
|
|
|
// DiscoveryProgress represents the progress of an ongoing discovery.
|
|
type DiscoveryProgress struct {
|
|
ResourceID string `json:"resource_id"`
|
|
Status DiscoveryStatus `json:"status"`
|
|
CurrentStep string `json:"current_step"`
|
|
CurrentCommand string `json:"current_command,omitempty"`
|
|
TotalSteps int `json:"total_steps"`
|
|
CompletedSteps int `json:"completed_steps"`
|
|
ElapsedMs int64 `json:"elapsed_ms,omitempty"`
|
|
PercentComplete float64 `json:"percent_complete,omitempty"`
|
|
StartedAt time.Time `json:"started_at"`
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
|
|
// AIProviderInfo describes the AI provider being used for discovery analysis.
|
|
type AIProviderInfo struct {
|
|
Provider string `json:"provider"` // e.g., "anthropic", "openai", "ollama"
|
|
Model string `json:"model"` // e.g., "claude-haiku-4-5", "gpt-4o"
|
|
IsLocal bool `json:"is_local"` // true for ollama (local models)
|
|
Label string `json:"label"` // Human-readable label, e.g., "Local (Ollama)" or "Cloud (Anthropic)"
|
|
}
|
|
|
|
// DiscoveryInfo provides metadata about the discovery system configuration.
|
|
type DiscoveryInfo struct {
|
|
AIProvider *AIProviderInfo `json:"ai_provider,omitempty"` // Current AI provider info
|
|
Commands []DiscoveryCommand `json:"commands,omitempty"` // Commands that will be run
|
|
CommandCategories []string `json:"command_categories,omitempty"` // Unique categories of commands
|
|
}
|
|
|
|
// UpdateNotesRequest represents a request to update user notes.
|
|
type UpdateNotesRequest struct {
|
|
UserNotes string `json:"user_notes"`
|
|
UserSecrets map[string]string `json:"user_secrets,omitempty"`
|
|
}
|
|
|
|
// DiscoverySummary provides a summary of discoveries for listing.
|
|
type DiscoverySummary struct {
|
|
ID string `json:"id"`
|
|
ResourceType ResourceType `json:"resource_type"`
|
|
ResourceID string `json:"resource_id"`
|
|
TargetID string `json:"target_id,omitempty"`
|
|
AgentID string `json:"agent_id,omitempty"`
|
|
Hostname string `json:"hostname"`
|
|
ServiceType string `json:"service_type"`
|
|
ServiceName string `json:"service_name"`
|
|
ServiceVersion string `json:"service_version"`
|
|
Category ServiceCategory `json:"category"`
|
|
Confidence float64 `json:"confidence"`
|
|
HasUserNotes bool `json:"has_user_notes"`
|
|
UpdatedAt time.Time `json:"updated_at"`
|
|
Fingerprint string `json:"fingerprint,omitempty"` // Current fingerprint
|
|
NeedsDiscovery bool `json:"needs_discovery"` // True if fingerprint changed
|
|
}
|
|
|
|
// ToSummary converts a full discovery to a summary.
|
|
func (d *ResourceDiscovery) ToSummary() DiscoverySummary {
|
|
targetID := d.TargetID
|
|
|
|
return DiscoverySummary{
|
|
ID: d.ID,
|
|
ResourceType: d.ResourceType,
|
|
ResourceID: d.ResourceID,
|
|
TargetID: targetID,
|
|
AgentID: d.AgentID,
|
|
Hostname: d.Hostname,
|
|
ServiceType: d.ServiceType,
|
|
ServiceName: d.ServiceName,
|
|
ServiceVersion: d.ServiceVersion,
|
|
Category: d.Category,
|
|
Confidence: d.Confidence,
|
|
HasUserNotes: d.UserNotes != "",
|
|
UpdatedAt: d.UpdatedAt,
|
|
Fingerprint: d.Fingerprint,
|
|
NeedsDiscovery: false, // Will be set by caller if fingerprint changed
|
|
}
|
|
}
|
|
|
|
// AIAnalysisRequest is sent to the AI for analysis.
|
|
type AIAnalysisRequest struct {
|
|
ResourceType ResourceType `json:"resource_type"`
|
|
ResourceID string `json:"resource_id"`
|
|
TargetID string `json:"target_id"`
|
|
Hostname string `json:"hostname"`
|
|
CommandOutputs map[string]string `json:"command_outputs"`
|
|
ExistingFacts []DiscoveryFact `json:"existing_facts,omitempty"`
|
|
Metadata map[string]any `json:"metadata,omitempty"` // Image, labels, etc.
|
|
}
|
|
|
|
// ServiceStatus is a typed snapshot of service runtime status.
|
|
type ServiceStatus struct {
|
|
Running bool
|
|
LastRun time.Time
|
|
Interval string
|
|
CacheSize int
|
|
AIAnalyzerSet bool
|
|
ScannerSet bool
|
|
StoreSet bool
|
|
DeepScanTimeout string
|
|
AIAnalysisTimeout string
|
|
MaxDiscoveryAge string
|
|
FingerprintCount int
|
|
LastFingerprintScan time.Time
|
|
}
|
|
|
|
// ToMap converts the status snapshot to a legacy map representation.
|
|
func (s ServiceStatus) ToMap() map[string]any {
|
|
return map[string]any{
|
|
"running": s.Running,
|
|
"last_run": s.LastRun,
|
|
"interval": s.Interval,
|
|
"cache_size": s.CacheSize,
|
|
"ai_analyzer_set": s.AIAnalyzerSet,
|
|
"scanner_set": s.ScannerSet,
|
|
"store_set": s.StoreSet,
|
|
"deep_scan_timeout": s.DeepScanTimeout,
|
|
"ai_analysis_timeout": s.AIAnalysisTimeout,
|
|
"max_discovery_age": s.MaxDiscoveryAge,
|
|
"fingerprint_count": s.FingerprintCount,
|
|
"last_fingerprint_scan": s.LastFingerprintScan,
|
|
}
|
|
}
|
|
|
|
// AIAnalysisResponse is returned by the AI.
|
|
type AIAnalysisResponse struct {
|
|
ServiceType string `json:"service_type"`
|
|
ServiceName string `json:"service_name"`
|
|
ServiceVersion string `json:"service_version"`
|
|
Category ServiceCategory `json:"category"`
|
|
CLIAccess string `json:"cli_access"`
|
|
Facts []DiscoveryFact `json:"facts"`
|
|
ConfigPaths []string `json:"config_paths"`
|
|
DataPaths []string `json:"data_paths"`
|
|
LogPaths []string `json:"log_paths"`
|
|
Ports []PortInfo `json:"ports"`
|
|
Confidence float64 `json:"confidence"`
|
|
Reasoning string `json:"reasoning"`
|
|
}
|
|
|
|
// ContainerFingerprint captures the key metadata that indicates a container changed.
|
|
// This is used for just-in-time discovery - only running discovery when something
|
|
// actually changed rather than on a fixed timer.
|
|
// FingerprintSchemaVersion is incremented when the fingerprint algorithm changes.
|
|
// This prevents mass rediscovery when we add new fields to the fingerprint hash.
|
|
// Old fingerprints with different schema versions are treated as "schema changed"
|
|
// rather than "container changed", allowing for more controlled migration.
|
|
const FingerprintSchemaVersion = 3 // v3: Removed IP addresses (DHCP churn caused false positives)
|
|
|
|
// CLIAccessVersion is incremented when the CLI access pattern format changes.
|
|
// When a discovery has an older version, its CLIAccess field is regenerated
|
|
// to use the new instructional format.
|
|
const CLIAccessVersion = 2 // v2: Changed from shell commands to pulse_control instructions
|
|
|
|
type ContainerFingerprint struct {
|
|
ResourceID string `json:"resource_id"`
|
|
TargetID string `json:"target_id"`
|
|
Hash string `json:"hash"` // SHA256 of metadata (truncated to 16 chars)
|
|
SchemaVersion int `json:"schema_version"` // Version of fingerprint algorithm
|
|
GeneratedAt time.Time `json:"generated_at"`
|
|
|
|
// Components that went into the hash (for debugging)
|
|
ImageID string `json:"image_id,omitempty"`
|
|
ImageName string `json:"image_name,omitempty"`
|
|
Ports []string `json:"ports,omitempty"`
|
|
MountPaths []string `json:"mount_paths,omitempty"`
|
|
EnvKeys []string `json:"env_keys,omitempty"` // Keys only, not values (security)
|
|
CreatedAt string `json:"created_at,omitempty"` // Container creation time
|
|
}
|
|
|
|
// IsSchemaOutdated returns true if this fingerprint was created with an older schema.
|
|
func (fp *ContainerFingerprint) IsSchemaOutdated() bool {
|
|
return fp.SchemaVersion < FingerprintSchemaVersion
|
|
}
|