Pulse/internal/ai/routing.go
rcourtman 3fdf753a5b Enhance devcontainer and CI workflows
- Add persistent volume mounts for Go/npm caches (faster rebuilds)
- Add shell config with helpful aliases and custom prompt
- Add comprehensive devcontainer documentation
- Add pre-commit hooks for Go formatting and linting
- Use go-version-file in CI workflows instead of hardcoded versions
- Simplify docker compose commands with --wait flag
- Add gitignore entries for devcontainer auth files

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-01 22:29:15 +00:00

383 lines
12 KiB
Go

// Package ai provides AI-powered diagnostic and command execution capabilities.
// This file contains the robust agent routing logic for executing commands on the correct host.
package ai
import (
"fmt"
"strconv"
"strings"
"github.com/rcourtman/pulse-go-rewrite/internal/agentexec"
"github.com/rcourtman/pulse-go-rewrite/internal/config"
"github.com/rs/zerolog/log"
)
// RoutingResult contains the result of agent routing
type RoutingResult struct {
AgentID string // ID of the selected agent
AgentHostname string // Hostname of the selected agent
TargetNode string // The node we're trying to reach
TargetVMID string // The VMID (for container/VM targets)
RoutingMethod string // How we determined the route (for debugging)
ClusterPeer bool // True if routing via a cluster peer
Warnings []string // Any warnings encountered during routing
}
// RoutingError represents a routing failure with actionable information
type RoutingError struct {
TargetNode string
TargetVMID int
AvailableAgents []string
Reason string
Suggestion string
AskForClarification bool // If true, AI should ask the user which host to use
}
func (e *RoutingError) Error() string {
if e.Suggestion != "" {
return fmt.Sprintf("%s. %s", e.Reason, e.Suggestion)
}
return e.Reason
}
// ForAI returns a message suitable for returning to the AI as a tool result
// This encourages the AI to ask the user for clarification rather than just failing
func (e *RoutingError) ForAI() string {
if e.AskForClarification && len(e.AvailableAgents) > 0 {
return fmt.Sprintf(
"ROUTING_CLARIFICATION_NEEDED: %s\n\n"+
"Available hosts: %s\n\n"+
"Please ask the user which host they want to run this command on. "+
"Do NOT try the command again until the user specifies which host. "+
"Present the available hosts in a friendly way and ask them to clarify.",
e.Reason, strings.Join(e.AvailableAgents, ", "))
}
return e.Error()
}
// routeToAgent determines which agent should execute a command.
// This is the authoritative routing function that should be used for all command execution.
//
// Routing priority:
// 1. VMID lookup from command (for pct/qm commands)
// 2. Unified ResourceProvider lookup (PRIMARY - uses the new infrastructure model)
// 3. Explicit context fields (FALLBACK - for backwards compatibility)
// 4. VMID extracted from target ID
//
// Agent matching is EXACT only - no substring matching to prevent false positives.
// If no direct match, cluster peer routing is attempted.
// If all else fails, returns an explicit error rather than silently using wrong agent.
func (s *Service) routeToAgent(req ExecuteRequest, command string, agents []agentexec.ConnectedAgent) (*RoutingResult, error) {
result := &RoutingResult{}
if len(agents) == 0 {
return nil, &RoutingError{
Reason: "No agents are connected to Pulse",
Suggestion: "Install pulse-agent on at least one host",
}
}
// Build a map of available agents for quick lookup and error messages
agentMap := make(map[string]agentexec.ConnectedAgent) // lowercase hostname -> agent
var agentHostnames []string
for _, agent := range agents {
hostname := strings.TrimSpace(strings.ToLower(agent.Hostname))
agentMap[hostname] = agent
agentHostnames = append(agentHostnames, agent.Hostname)
}
// Step 1: Try VMID-based routing (most authoritative for pct/qm commands)
if vmid, requiresOwnerNode, found := extractVMIDFromCommand(command); found && requiresOwnerNode {
targetInstance := ""
if inst, ok := req.Context["instance"].(string); ok {
targetInstance = inst
}
guests := s.lookupGuestsByVMID(vmid, targetInstance)
if len(guests) == 0 {
result.Warnings = append(result.Warnings,
fmt.Sprintf("VMID %d not found in Pulse state - routing based on context", vmid))
} else if len(guests) == 1 {
result.TargetNode = strings.ToLower(guests[0].Node)
result.RoutingMethod = "vmid_lookup"
log.Info().
Int("vmid", vmid).
Str("node", guests[0].Node).
Str("guest_name", guests[0].Name).
Msg("Routed command via VMID state lookup")
} else {
// Multiple matches - try to disambiguate
if targetInstance != "" {
for _, g := range guests {
if strings.EqualFold(g.Instance, targetInstance) {
result.TargetNode = strings.ToLower(g.Node)
result.RoutingMethod = "vmid_lookup_with_instance"
log.Info().
Int("vmid", vmid).
Str("node", g.Node).
Str("instance", g.Instance).
Msg("Resolved VMID collision using instance")
break
}
}
}
if result.TargetNode == "" {
// Return explicit error for VMID collision
var locations []string
for _, g := range guests {
locations = append(locations, fmt.Sprintf("%s on %s (%s)", g.Name, g.Node, g.Instance))
}
return nil, &RoutingError{
TargetVMID: vmid,
AvailableAgents: agentHostnames,
Reason: fmt.Sprintf("VMID %d exists on multiple nodes: %s",
vmid, strings.Join(locations, ", ")),
Suggestion: "Specify the instance/cluster in your query to disambiguate",
}
}
}
}
// Step 2: Try unified ResourceProvider lookup (PRIMARY method for workloads)
// This uses the new redesigned infrastructure model which knows the relationships
// between all resources (containers → hosts, VMs → nodes, etc.)
if result.TargetNode == "" {
s.mu.RLock()
rp := s.resourceProvider
s.mu.RUnlock()
if rp != nil {
// Try to find the host for this workload
resourceName := ""
if name, ok := req.Context["containerName"].(string); ok && name != "" {
resourceName = name
} else if name, ok := req.Context["name"].(string); ok && name != "" {
resourceName = name
} else if name, ok := req.Context["guestName"].(string); ok && name != "" {
resourceName = name
}
if resourceName != "" {
if host := rp.FindContainerHost(resourceName); host != "" {
result.TargetNode = strings.ToLower(host)
result.RoutingMethod = "resource_provider_lookup"
log.Info().
Str("resource_name", resourceName).
Str("host", host).
Str("target_type", req.TargetType).
Str("command", command).
Msg("Routing via unified ResourceProvider")
}
}
}
}
// Step 3: Fallback to explicit context fields (backwards compatibility)
// These are checked in order of specificity
if result.TargetNode == "" {
// Try the most specific fields first
hostFields := []string{"node", "host", "guest_node", "hostname", "host_name", "target_host"}
for _, field := range hostFields {
if value, ok := req.Context[field].(string); ok && value != "" {
result.TargetNode = strings.ToLower(value)
result.RoutingMethod = "context_" + field
log.Debug().
Str("field", field).
Str("value", value).
Str("command", command).
Msg("Routing via context field (fallback)")
break
}
}
}
// Step 3: Extract VMID from target ID and look up in state
if result.TargetNode == "" && req.TargetID != "" {
if vmid := extractVMIDFromTargetID(req.TargetID); vmid > 0 {
result.TargetVMID = strconv.Itoa(vmid)
// Try instance from context
targetInstance := ""
if inst, ok := req.Context["instance"].(string); ok {
targetInstance = inst
}
guests := s.lookupGuestsByVMID(vmid, targetInstance)
if len(guests) == 1 {
result.TargetNode = strings.ToLower(guests[0].Node)
result.RoutingMethod = "target_id_vmid_lookup"
log.Debug().
Int("vmid", vmid).
Str("node", guests[0].Node).
Str("target_id", req.TargetID).
Msg("Resolved node from target ID VMID lookup")
}
}
}
// Step 4: Try to find exact matching agent
if result.TargetNode != "" {
targetNodeClean := strings.TrimSpace(strings.ToLower(result.TargetNode))
// EXACT match only - no substring matching
if agent, exists := agentMap[targetNodeClean]; exists {
result.AgentID = agent.AgentID
result.AgentHostname = agent.Hostname
log.Debug().
Str("target_node", result.TargetNode).
Str("agent", agent.Hostname).
Str("method", result.RoutingMethod).
Msg("Exact agent match found")
return result, nil
}
// Try cluster peer routing
if peerAgentID := s.findClusterPeerAgent(targetNodeClean, agents); peerAgentID != "" {
for _, agent := range agents {
if agent.AgentID == peerAgentID {
result.AgentID = peerAgentID
result.AgentHostname = agent.Hostname
result.ClusterPeer = true
log.Info().
Str("target_node", result.TargetNode).
Str("peer_agent", agent.Hostname).
Msg("Routing via cluster peer agent")
return result, nil
}
}
}
// No agent available for this node
return nil, &RoutingError{
TargetNode: result.TargetNode,
AvailableAgents: agentHostnames,
Reason: fmt.Sprintf("No agent connected to node %q", result.TargetNode),
Suggestion: fmt.Sprintf("Install pulse-agent on %q, or ensure it's in a cluster with %s",
result.TargetNode, strings.Join(agentHostnames, ", ")),
}
}
// Step 5: No target node determined - for host commands with no context, use first agent
if req.TargetType == "host" && len(agents) == 1 {
result.AgentID = agents[0].AgentID
result.AgentHostname = agents[0].Hostname
result.RoutingMethod = "single_agent_fallback"
result.Warnings = append(result.Warnings,
fmt.Sprintf("No target node specified, using the only connected agent (%s). For multi-agent setups, specify target_host.", agents[0].Hostname))
log.Info().
Str("agent", agents[0].Hostname).
Str("target_type", req.TargetType).
Msg("Routing via single-agent fallback")
return result, nil
}
// Cannot determine where to route
// Provide actionable error with available agents listed
log.Error().
Str("target_type", req.TargetType).
Str("target_id", req.TargetID).
Strs("available_agents", agentHostnames).
Msg("Routing failed - cannot determine target agent")
return nil, &RoutingError{
AvailableAgents: agentHostnames,
Reason: "Cannot determine which host should execute this command",
Suggestion: fmt.Sprintf("Please specify which host: %s", strings.Join(agentHostnames, ", ")),
AskForClarification: true,
}
}
// extractVMIDFromTargetID extracts a numeric VMID from various target ID formats.
// Handles formats like:
// - "delly-minipc-106" -> 106
// - "minipc-106" -> 106
// - "106" -> 106
// - "lxc-106" -> 106
// - "vm-106" -> 106
func extractVMIDFromTargetID(targetID string) int {
if targetID == "" {
return 0
}
// Try parsing the whole thing as a number first
if vmid, err := strconv.Atoi(targetID); err == nil && vmid > 0 {
return vmid
}
// Split by hyphen and take the last numeric part
parts := strings.Split(targetID, "-")
for i := len(parts) - 1; i >= 0; i-- {
if vmid, err := strconv.Atoi(parts[i]); err == nil && vmid > 0 {
return vmid
}
}
return 0
}
// findClusterPeerAgent finds an agent that can execute commands for a node in the same cluster.
// For PVE clusters, any node can execute pvesh/vzdump commands, but pct exec/qm guest exec
// require the agent to be on the specific node.
func (s *Service) findClusterPeerAgent(targetNode string, agents []agentexec.ConnectedAgent) string {
// Check for nil persistence
if s.persistence == nil {
return ""
}
// Load nodes config to check cluster membership
nodesConfig, err := s.persistence.LoadNodesConfig()
if err != nil || nodesConfig == nil {
return ""
}
// Find which cluster the target node belongs to
var targetCluster string
var clusterEndpoints []config.ClusterEndpoint
for _, pve := range nodesConfig.PVEInstances {
if strings.EqualFold(pve.Name, targetNode) {
if pve.IsCluster && pve.ClusterName != "" {
targetCluster = pve.ClusterName
clusterEndpoints = pve.ClusterEndpoints
}
break
}
// Also check cluster endpoints
for _, ep := range pve.ClusterEndpoints {
if strings.EqualFold(ep.NodeName, targetNode) {
if pve.IsCluster && pve.ClusterName != "" {
targetCluster = pve.ClusterName
clusterEndpoints = pve.ClusterEndpoints
}
break
}
}
}
if targetCluster == "" {
return ""
}
// Build list of cluster member nodes
clusterNodes := make(map[string]bool)
for _, ep := range clusterEndpoints {
clusterNodes[strings.ToLower(ep.NodeName)] = true
}
// Find an agent on any cluster member
for _, agent := range agents {
agentHostname := strings.ToLower(agent.Hostname)
if clusterNodes[agentHostname] {
log.Debug().
Str("target_node", targetNode).
Str("cluster", targetCluster).
Str("peer_agent", agent.Hostname).
Msg("Found cluster peer agent")
return agent.AgentID
}
}
return ""
}