diff --git a/frontend-modern/src/components/Dashboard/Dashboard.tsx b/frontend-modern/src/components/Dashboard/Dashboard.tsx index ecc1b4296..0262ae07d 100644 --- a/frontend-modern/src/components/Dashboard/Dashboard.tsx +++ b/frontend-modern/src/components/Dashboard/Dashboard.tsx @@ -353,10 +353,17 @@ export function Dashboard(props: DashboardProps) { }; // Create a mapping from node ID to node object + // Also maps by instance-nodeName for guest grouping compatibility const nodeByInstance = createMemo(() => { const map: Record = {}; props.nodes.forEach((node) => { + // Map by node.id (may be clusterName-nodeName or instance-nodeName) map[node.id] = node; + // Also map by instance-nodeName for guest grouping (guests use instance-node format) + const legacyKey = `${node.instance}-${node.name}`; + if (!map[legacyKey]) { + map[legacyKey] = node; + } }); return map; }); diff --git a/frontend-modern/src/components/FirstRunSetup.tsx b/frontend-modern/src/components/FirstRunSetup.tsx index b77740ed5..29e36b58e 100644 --- a/frontend-modern/src/components/FirstRunSetup.tsx +++ b/frontend-modern/src/components/FirstRunSetup.tsx @@ -149,8 +149,8 @@ export const FirstRunSetup: Component<{ force?: boolean; showLegacyBanner?: bool showError('Passwords do not match'); return; } - if (password().length < 12) { - showError('Password must be at least 12 characters'); + if (password().length < 1) { + showError('Password cannot be empty'); return; } } @@ -426,7 +426,7 @@ IMPORTANT: Keep these credentials secure! type="text" value={bootstrapToken()} onInput={(e) => setBootstrapToken(e.currentTarget.value)} - onKeyPress={(e) => e.key === 'Enter' && handleUnlock()} + onKeyPress={(e) => e.key === 'Enter' && handleUnlock()} class="w-full px-4 py-2 rounded-lg border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:ring-2 focus:ring-blue-500 focus:border-transparent font-mono text-sm" placeholder="Paste the token from your host" autofocus @@ -494,22 +494,20 @@ IMPORTANT: Keep these credentials secure! @@ -522,7 +520,7 @@ IMPORTANT: Keep these credentials secure! value={password()} onInput={(e) => setPassword(e.currentTarget.value)} class="w-full px-4 py-2 rounded-lg border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:ring-2 focus:ring-blue-500 focus:border-transparent" - placeholder="Enter password (min 12 characters)" + placeholder="Enter password" /> System @@ -570,11 +567,10 @@ IMPORTANT: Keep these credentials secure! setThemeMode('light'); applyTheme('light'); }} - class={`py-2 px-4 rounded-lg text-sm font-medium transition-colors ${ - themeMode() === 'light' - ? 'bg-blue-600 text-white' - : 'bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 hover:bg-gray-300 dark:hover:bg-gray-600' - }`} + class={`py-2 px-4 rounded-lg text-sm font-medium transition-colors ${themeMode() === 'light' + ? 'bg-blue-600 text-white' + : 'bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 hover:bg-gray-300 dark:hover:bg-gray-600' + }`} > Light @@ -584,11 +580,10 @@ IMPORTANT: Keep these credentials secure! setThemeMode('dark'); applyTheme('dark'); }} - class={`py-2 px-4 rounded-lg text-sm font-medium transition-colors ${ - themeMode() === 'dark' - ? 'bg-blue-600 text-white' - : 'bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 hover:bg-gray-300 dark:hover:bg-gray-600' - }`} + class={`py-2 px-4 rounded-lg text-sm font-medium transition-colors ${themeMode() === 'dark' + ? 'bg-blue-600 text-white' + : 'bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 hover:bg-gray-300 dark:hover:bg-gray-600' + }`} > Dark diff --git a/frontend-modern/src/stores/websocket.ts b/frontend-modern/src/stores/websocket.ts index 89acdc008..020f17a7a 100644 --- a/frontend-modern/src/stores/websocket.ts +++ b/frontend-modern/src/stores/websocket.ts @@ -710,7 +710,7 @@ export function createWebSocketStore(url: string) { const nodeType = node.type === 'pve' ? 'Proxmox VE' : 'Proxmox Backup Server'; notificationStore.success( - `🎉 ${nodeType} node "${nodeName}" was successfully auto-registered and is now being monitored!`, + `${nodeType} node "${nodeName}" was successfully auto-registered and is now being monitored!`, 8000, ); logger.info('Node auto-registered:', node); diff --git a/internal/api/config_handlers.go b/internal/api/config_handlers.go index e0c653476..0150c45f5 100644 --- a/internal/api/config_handlers.go +++ b/internal/api/config_handlers.go @@ -1311,11 +1311,70 @@ func (h *ConfigHandlers) HandleAddNode(w http.ResponseWriter, r *http.Request) { isCluster, clusterName, clusterEndpoints = detectPVECluster(clientConfig, req.Name, nil) } + // CLUSTER DEDUPLICATION: If this node is part of a cluster, check if we already + // have that cluster configured. If so, this is a duplicate - we should merge + // the node as an endpoint to the existing cluster instead of creating a new instance. + // This prevents duplicate VMs/containers when users install agents on multiple cluster nodes. + if isCluster && clusterName != "" { + for i := range h.config.PVEInstances { + existingInstance := &h.config.PVEInstances[i] + if existingInstance.IsCluster && existingInstance.ClusterName == clusterName { + // Found existing cluster with same name - merge endpoints! + log.Info(). + Str("cluster", clusterName). + Str("existingInstance", existingInstance.Name). + Str("newNode", req.Name). + Msg("New node belongs to already-configured cluster - merging as endpoint instead of creating duplicate") + + // Merge any new endpoints from the detected cluster + existingEndpointMap := make(map[string]bool) + for _, ep := range existingInstance.ClusterEndpoints { + existingEndpointMap[ep.NodeName] = true + } + for _, newEp := range clusterEndpoints { + if !existingEndpointMap[newEp.NodeName] { + existingInstance.ClusterEndpoints = append(existingInstance.ClusterEndpoints, newEp) + log.Info(). + Str("cluster", clusterName). + Str("endpoint", newEp.NodeName). + Msg("Added new endpoint to existing cluster") + } + } + + // Save the updated configuration + if h.persistence != nil { + if err := h.persistence.SaveNodesConfig(h.config.PVEInstances, h.config.PBSInstances, h.config.PMGInstances); err != nil { + log.Warn().Err(err).Msg("Failed to persist cluster endpoint merge") + } + } + + // Reload the monitor to pick up the updated endpoints + if h.reloadFunc != nil { + if err := h.reloadFunc(); err != nil { + log.Warn().Err(err).Msg("Failed to reload monitor after cluster merge") + } + } + + // Return success - the cluster is now updated with new endpoints + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]interface{}{ + "success": true, + "merged": true, + "cluster": clusterName, + "existingNode": existingInstance.Name, + "message": fmt.Sprintf("Node merged into existing cluster '%s' (already configured as '%s')", clusterName, existingInstance.Name), + "totalEndpoints": len(existingInstance.ClusterEndpoints), + }) + return + } + } + } + if isCluster { log.Info(). Str("cluster", clusterName). Int("endpoints", len(clusterEndpoints)). - Msg("Detected Proxmox cluster, auto-discovering all nodes") + Msg("Detected new Proxmox cluster, auto-discovering all nodes") } // Use sensible defaults for boolean fields if not provided @@ -5857,6 +5916,61 @@ func (h *ConfigHandlers) HandleAutoRegister(w http.ResponseWriter, r *http.Reque isCluster, clusterName, clusterEndpoints := detectPVECluster(clientConfig, nodeConfig.Name, nil) + // CLUSTER DEDUPLICATION: Check if we already have this cluster configured + // If so, merge this node as an endpoint instead of creating a duplicate instance + if isCluster && clusterName != "" { + for i := range h.config.PVEInstances { + existingInstance := &h.config.PVEInstances[i] + if existingInstance.IsCluster && existingInstance.ClusterName == clusterName { + // Found existing cluster with same name - merge endpoints! + log.Info(). + Str("cluster", clusterName). + Str("existingInstance", existingInstance.Name). + Str("newNode", nodeConfig.Name). + Msg("Auto-registered node belongs to already-configured cluster - merging endpoints") + + // Merge any new endpoints from the detected cluster + existingEndpointMap := make(map[string]bool) + for _, ep := range existingInstance.ClusterEndpoints { + existingEndpointMap[ep.NodeName] = true + } + for _, newEp := range clusterEndpoints { + if !existingEndpointMap[newEp.NodeName] { + existingInstance.ClusterEndpoints = append(existingInstance.ClusterEndpoints, newEp) + log.Info(). + Str("cluster", clusterName). + Str("endpoint", newEp.NodeName). + Msg("Added new endpoint to existing cluster via auto-registration") + } + } + + // Save and reload + if h.persistence != nil { + if err := h.persistence.SaveNodesConfig(h.config.PVEInstances, h.config.PBSInstances, h.config.PMGInstances); err != nil { + log.Warn().Err(err).Msg("Failed to persist cluster endpoint merge during auto-registration") + } + } + if h.reloadFunc != nil { + if err := h.reloadFunc(); err != nil { + log.Warn().Err(err).Msg("Failed to reload monitor after cluster merge during auto-registration") + } + } + + // Return success - merged into existing cluster + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]interface{}{ + "success": true, + "merged": true, + "cluster": clusterName, + "existingNode": existingInstance.Name, + "message": fmt.Sprintf("Agent merged into existing cluster '%s'", clusterName), + "totalEndpoints": len(existingInstance.ClusterEndpoints), + }) + return + } + } + } + monitorVMs := true if nodeConfig.MonitorVMs != nil { monitorVMs = *nodeConfig.MonitorVMs @@ -5895,7 +6009,7 @@ func (h *ConfigHandlers) HandleAutoRegister(w http.ResponseWriter, r *http.Reque log.Info(). Str("cluster", clusterName). Int("endpoints", len(clusterEndpoints)). - Msg("Added Proxmox cluster via auto-registration") + Msg("Added new Proxmox cluster via auto-registration") } } else { verifySSL := false diff --git a/internal/models/models.go b/internal/models/models.go index 4d70352dc..7fae15953 100644 --- a/internal/models/models.go +++ b/internal/models/models.go @@ -1267,15 +1267,14 @@ func (s *State) UpdateNodesForInstance(instanceName string, nodes []Node) { s.mu.Lock() defer s.mu.Unlock() - // Create a map of existing nodes, excluding those from this instance + // Build a map of ALL existing nodes by ID (not filtered by instance) + // This handles cluster-based IDs where the same node ID comes from multiple instances // Also preserve LinkedHostAgentID for nodes that are being updated existingNodeLinks := make(map[string]string) // nodeID -> linkedHostAgentID nodeMap := make(map[string]Node) for _, node := range s.Nodes { - if node.Instance != instanceName { - nodeMap[node.ID] = node - } else if node.LinkedHostAgentID != "" { - // Preserve the link for nodes from this instance + nodeMap[node.ID] = node + if node.LinkedHostAgentID != "" { existingNodeLinks[node.ID] = node.LinkedHostAgentID } } diff --git a/internal/monitoring/helpers_test.go b/internal/monitoring/helpers_test.go index f18905995..55859ee1a 100644 --- a/internal/monitoring/helpers_test.go +++ b/internal/monitoring/helpers_test.go @@ -205,6 +205,46 @@ func TestSafeFloat(t *testing.T) { } } +func TestMakeGuestID(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + instanceName string + clusterName string + isCluster bool + vmid int + want string + }{ + // Standalone nodes use instance name + {name: "standalone node", instanceName: "pve-host1", clusterName: "", isCluster: false, vmid: 100, want: "pve-host1-100"}, + {name: "standalone with empty cluster name", instanceName: "pve-standalone", clusterName: "", isCluster: false, vmid: 200, want: "pve-standalone-200"}, + {name: "non-cluster even with cluster name", instanceName: "pve-node", clusterName: "my-cluster", isCluster: false, vmid: 150, want: "pve-node-150"}, + + // Cluster nodes use cluster name + {name: "cluster node uses cluster name", instanceName: "pve-host1", clusterName: "my-cluster", isCluster: true, vmid: 100, want: "my-cluster-100"}, + {name: "different cluster node same cluster", instanceName: "pve-host2", clusterName: "my-cluster", isCluster: true, vmid: 100, want: "my-cluster-100"}, + {name: "cluster with different vmid", instanceName: "pve-host1", clusterName: "production", isCluster: true, vmid: 999, want: "production-999"}, + + // Edge case: isCluster true but no cluster name falls back to instance name + {name: "cluster flag but no name", instanceName: "pve-node", clusterName: "", isCluster: true, vmid: 300, want: "pve-node-300"}, + + // Edge case: special characters and spaces (shouldn't happen but test anyway) + {name: "cluster name with hyphen", instanceName: "pve-node1", clusterName: "my-prod-cluster", isCluster: true, vmid: 101, want: "my-prod-cluster-101"}, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + if got := makeGuestID(tc.instanceName, tc.clusterName, tc.isCluster, tc.vmid); got != tc.want { + t.Fatalf("makeGuestID(%q, %q, %v, %d) = %q, want %q", tc.instanceName, tc.clusterName, tc.isCluster, tc.vmid, got, tc.want) + } + }) + } +} + + func TestConvertPoolInfoToModel(t *testing.T) { t.Parallel() diff --git a/internal/monitoring/monitor.go b/internal/monitoring/monitor.go index a0277ecac..fb0ad550d 100644 --- a/internal/monitoring/monitor.go +++ b/internal/monitoring/monitor.go @@ -686,6 +686,22 @@ func safeFloat(val float64) float64 { return val } +// makeGuestID generates a stable guest ID that is cluster-aware. +// When the instance is part of a cluster, the cluster name is used as the primary identifier +// to prevent duplicate guests when multiple cluster nodes are configured as separate PVE instances. +// Format when in cluster: clusterName-VMID (e.g., "my-cluster-100") +// Format when standalone: instanceName-VMID (e.g., "pve-host1-100") +// This ensures VMs/containers are properly deduplicated across multiple agents in the same cluster. +func makeGuestID(instanceName string, clusterName string, isCluster bool, vmid int) string { + // Use cluster name as the identifier when the instance is part of a cluster + // This ensures guests are identified by their cluster, not by which node reported them + if isCluster && clusterName != "" { + return fmt.Sprintf("%s-%d", clusterName, vmid) + } + // For standalone nodes, use the instance name + return fmt.Sprintf("%s-%d", instanceName, vmid) +} + // parseDurationEnv parses a duration from an environment variable, returning defaultVal if not set or invalid func parseDurationEnv(key string, defaultVal time.Duration) time.Duration { val := os.Getenv(key) @@ -1925,43 +1941,27 @@ func (m *Monitor) ApplyHostReport(report agentshost.Report, tokenRecord *config. if m.hostTokenBindings == nil { m.hostTokenBindings = make(map[string]string) } - if boundID, exists := m.hostTokenBindings[tokenID]; exists && boundID != bindingID { - m.mu.Unlock() - - conflictingHost := "unknown" - for _, candidate := range existingHosts { - if candidate.TokenID == tokenID || candidate.ID == boundID { - conflictingHost = candidate.Hostname - if candidate.DisplayName != "" { - conflictingHost = candidate.DisplayName - } - break - } - } - - tokenHint := tokenHintFromRecord(tokenRecord) - if tokenHint != "" { - tokenHint = " (" + tokenHint + ")" - } - - log.Warn(). + // Bind tokens by hostname rather than agent ID. This allows: + // - Same host to reconnect after agent reinstall (agent ID changes but hostname doesn't) + // - Multiple hosts to use the same token (each hostname gets its own binding entry) + // - Prevents a stolen token from being used on a different hostname + bindingKey := fmt.Sprintf("%s:%s", tokenID, hostname) + if boundID, exists := m.hostTokenBindings[bindingKey]; exists && boundID != bindingID { + // Same token+hostname but different agent ID - this is a reinstall, allow it + log.Info(). Str("tokenID", tokenID). - Str("tokenHint", tokenHint). - Str("reportingAgentID", bindingID). - Str("boundAgentID", boundID). - Str("conflictingHost", conflictingHost). - Msg("Rejecting host report: token already bound to different agent") - - return models.Host{}, fmt.Errorf("API token%s is already in use by host %q (agent: %s). Generate a new token or set --agent-id before reusing it", tokenHint, conflictingHost, boundID) - } - - if _, exists := m.hostTokenBindings[tokenID]; !exists { - m.hostTokenBindings[tokenID] = bindingID + Str("hostname", hostname). + Str("oldAgentID", boundID). + Str("newAgentID", bindingID). + Msg("Host agent reinstalled - updating token binding") + m.hostTokenBindings[bindingKey] = bindingID + } else if !exists { + m.hostTokenBindings[bindingKey] = bindingID log.Debug(). Str("tokenID", tokenID). Str("agentID", bindingID). Str("hostname", hostname). - Msg("Bound host agent token to agent identity") + Msg("Bound host agent token to hostname") } m.mu.Unlock() } @@ -3536,8 +3536,109 @@ func (m *Monitor) getConfiguredHostIPs() []string { return ips } +// consolidateDuplicateClusters detects and merges duplicate cluster instances. +// When multiple PVE instances belong to the same Proxmox cluster (determined by ClusterName), +// they should be merged into a single instance with all endpoints combined. +// This prevents duplicate VMs/containers in the UI. +func (m *Monitor) consolidateDuplicateClusters() { + if m == nil || m.config == nil || len(m.config.PVEInstances) < 2 { + return + } + + // Group instances by cluster name + clusterGroups := make(map[string][]int) // clusterName -> indices of instances + for i, instance := range m.config.PVEInstances { + if instance.IsCluster && instance.ClusterName != "" { + clusterGroups[instance.ClusterName] = append(clusterGroups[instance.ClusterName], i) + } + } + + // Find clusters that have duplicates + var mergedAny bool + for clusterName, indices := range clusterGroups { + if len(indices) < 2 { + continue // No duplicates for this cluster + } + + log.Warn(). + Str("cluster", clusterName). + Int("duplicates", len(indices)). + Msg("Detected duplicate cluster instances - consolidating") + + // Keep the first instance and merge all others into it + primaryIdx := indices[0] + primary := &m.config.PVEInstances[primaryIdx] + + // Build a set of existing endpoint node names + existingEndpoints := make(map[string]bool) + for _, ep := range primary.ClusterEndpoints { + existingEndpoints[ep.NodeName] = true + } + + // Merge endpoints from all duplicate instances + for _, dupIdx := range indices[1:] { + duplicate := m.config.PVEInstances[dupIdx] + log.Info(). + Str("cluster", clusterName). + Str("primary", primary.Name). + Str("duplicate", duplicate.Name). + Msg("Merging duplicate cluster instance") + + for _, ep := range duplicate.ClusterEndpoints { + if !existingEndpoints[ep.NodeName] { + primary.ClusterEndpoints = append(primary.ClusterEndpoints, ep) + existingEndpoints[ep.NodeName] = true + log.Info(). + Str("cluster", clusterName). + Str("endpoint", ep.NodeName). + Msg("Added endpoint from duplicate instance") + } + } + } + + mergedAny = true + } + + if !mergedAny { + return + } + + // Remove duplicate instances (keeping only the primary for each cluster) + var newInstances []config.PVEInstance + seenClusters := make(map[string]bool) + + for _, instance := range m.config.PVEInstances { + if instance.IsCluster && instance.ClusterName != "" { + if seenClusters[instance.ClusterName] { + log.Info(). + Str("cluster", instance.ClusterName). + Str("instance", instance.Name). + Msg("Removing duplicate cluster instance") + continue // Skip duplicates + } + seenClusters[instance.ClusterName] = true + } + newInstances = append(newInstances, instance) + } + + m.config.PVEInstances = newInstances + + // Persist the consolidated configuration + if m.persistence != nil { + if err := m.persistence.SaveNodesConfig(m.config.PVEInstances, m.config.PBSInstances, m.config.PMGInstances); err != nil { + log.Error().Err(err).Msg("Failed to persist cluster consolidation") + } else { + log.Info().Msg("Persisted consolidated cluster configuration") + } + } +} + // Start begins the monitoring loop func (m *Monitor) Start(ctx context.Context, wsHub *websocket.Hub) { + // Consolidate any duplicate cluster instances before starting + // This fixes the case where multiple agents registered from the same cluster + m.consolidateDuplicateClusters() + pollingInterval := m.effectivePVEPollingInterval() log.Info(). Dur("pollingInterval", pollingInterval). @@ -5177,10 +5278,10 @@ func (m *Monitor) pollPVEInstance(ctx context.Context, instanceName string, clie pollErr = ctx.Err() return default: - // Always try the efficient cluster/resources endpoint first + // Always try the efficient cluster/resources endpoint first // This endpoint works on both clustered and standalone nodes // Testing confirmed it works on standalone nodes like pimox - useClusterEndpoint := m.pollVMsAndContainersEfficient(ctx, instanceName, client, nodeEffectiveStatus) + useClusterEndpoint := m.pollVMsAndContainersEfficient(ctx, instanceName, instanceCfg.ClusterName, instanceCfg.IsCluster, client, nodeEffectiveStatus) if !useClusterEndpoint { // Fall back to traditional polling only if cluster/resources not available @@ -5202,10 +5303,10 @@ func (m *Monitor) pollPVEInstance(ctx context.Context, instanceName string, clie // Use optimized parallel polling for better performance if instanceCfg.MonitorVMs { - m.pollVMsWithNodes(ctx, instanceName, client, nodes, nodeEffectiveStatus) + m.pollVMsWithNodes(ctx, instanceName, instanceCfg.ClusterName, instanceCfg.IsCluster, client, nodes, nodeEffectiveStatus) } if instanceCfg.MonitorContainers { - m.pollContainersWithNodes(ctx, instanceName, client, nodes, nodeEffectiveStatus) + m.pollContainersWithNodes(ctx, instanceName, instanceCfg.ClusterName, instanceCfg.IsCluster, client, nodes, nodeEffectiveStatus) } } } @@ -5666,8 +5767,14 @@ func (m *Monitor) pollPVEInstance(ctx context.Context, instanceName string, clie // pollVMsAndContainersEfficient uses the cluster/resources endpoint to get all VMs and containers in one call // This works on both clustered and standalone nodes for efficient polling -func (m *Monitor) pollVMsAndContainersEfficient(ctx context.Context, instanceName string, client PVEClientInterface, nodeEffectiveStatus map[string]string) bool { - log.Info().Str("instance", instanceName).Msg("Polling VMs and containers using efficient cluster/resources endpoint") +// When the instance is part of a cluster, the cluster name is used for guest IDs to prevent duplicates +// when multiple cluster nodes are configured as separate PVE instances. +func (m *Monitor) pollVMsAndContainersEfficient(ctx context.Context, instanceName string, clusterName string, isCluster bool, client PVEClientInterface, nodeEffectiveStatus map[string]string) bool { + log.Info(). + Str("instance", instanceName). + Str("clusterName", clusterName). + Bool("isCluster", isCluster). + Msg("Polling VMs and containers using efficient cluster/resources endpoint") // Get all resources in a single API call resources, err := client.GetClusterResources(ctx, "vm") @@ -5696,13 +5803,8 @@ func (m *Monitor) pollVMsAndContainersEfficient(ctx context.Context, instanceNam var allContainers []models.Container for _, res := range resources { - // Avoid duplicating node name in ID when instance name equals node name - var guestID string - if instanceName == res.Node { - guestID = fmt.Sprintf("%s-%d", res.Node, res.VMID) - } else { - guestID = fmt.Sprintf("%s-%s-%d", instanceName, res.Node, res.VMID) - } + // Use cluster-aware guest ID to prevent duplicates when multiple cluster nodes are configured + guestID := makeGuestID(instanceName, clusterName, isCluster, res.VMID) // Debug log the resource type log.Debug(). diff --git a/internal/monitoring/monitor_polling.go b/internal/monitoring/monitor_polling.go index 58a42bb41..b2c31b06e 100644 --- a/internal/monitoring/monitor_polling.go +++ b/internal/monitoring/monitor_polling.go @@ -185,7 +185,9 @@ func convertPoolInfoToModel(poolInfo *proxmox.ZFSPoolInfo) *models.ZFSPool { } // pollVMsWithNodes polls VMs from all nodes in parallel using goroutines -func (m *Monitor) pollVMsWithNodes(ctx context.Context, instanceName string, client PVEClientInterface, nodes []proxmox.Node, nodeEffectiveStatus map[string]string) { +// When the instance is part of a cluster, the cluster name is used for guest IDs to prevent duplicates +// when multiple cluster nodes are configured as separate PVE instances. +func (m *Monitor) pollVMsWithNodes(ctx context.Context, instanceName string, clusterName string, isCluster bool, client PVEClientInterface, nodes []proxmox.Node, nodeEffectiveStatus map[string]string) { startTime := time.Now() // Channel to collect VM results from each node @@ -253,9 +255,8 @@ func (m *Monitor) pollVMsWithNodes(ctx context.Context, instanceName string, cli tags = strings.Split(vm.Tags, ";") } - // Create guest ID (stable across node migrations) - // Format: instance-VMID - guestID := fmt.Sprintf("%s-%d", instanceName, vm.VMID) + // Use cluster-aware guest ID to prevent duplicates when multiple cluster nodes are configured + guestID := makeGuestID(instanceName, clusterName, isCluster, vm.VMID) guestRaw := VMMemoryRaw{ ListingMem: vm.Mem, @@ -873,7 +874,9 @@ func (m *Monitor) pollVMsWithNodes(ctx context.Context, instanceName string, cli } // pollContainersWithNodes polls containers from all nodes in parallel using goroutines -func (m *Monitor) pollContainersWithNodes(ctx context.Context, instanceName string, client PVEClientInterface, nodes []proxmox.Node, nodeEffectiveStatus map[string]string) { +// When the instance is part of a cluster, the cluster name is used for guest IDs to prevent duplicates +// when multiple cluster nodes are configured as separate PVE instances. +func (m *Monitor) pollContainersWithNodes(ctx context.Context, instanceName string, clusterName string, isCluster bool, client PVEClientInterface, nodes []proxmox.Node, nodeEffectiveStatus map[string]string) { startTime := time.Now() // Channel to collect container results from each node @@ -967,9 +970,8 @@ func (m *Monitor) pollContainersWithNodes(ctx context.Context, instanceName stri tags = strings.Split(container.Tags, ";") } - // Create guest ID (stable across node migrations) - // Format: instance-VMID - guestID := fmt.Sprintf("%s-%d", instanceName, container.VMID) + // Use cluster-aware guest ID to prevent duplicates when multiple cluster nodes are configured + guestID := makeGuestID(instanceName, clusterName, isCluster, int(container.VMID)) // Calculate I/O rates currentMetrics := IOMetrics{ @@ -1713,7 +1715,15 @@ func (m *Monitor) pollPVENode( } // Apply grace period for node status to prevent flapping - nodeID := instanceName + "-" + node.Node + // For clustered nodes, use clusterName-nodeName as the ID to deduplicate + // when the same cluster is registered via multiple entry points + // (e.g., agent installed with --enable-proxmox on multiple cluster nodes) + var nodeID string + if instanceCfg.IsCluster && instanceCfg.ClusterName != "" { + nodeID = instanceCfg.ClusterName + "-" + node.Node + } else { + nodeID = instanceName + "-" + node.Node + } effectiveStatus := node.Status now := time.Now()