mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 03:20:11 +00:00
Fix guest override migration for canonical IDs (#1334)
This commit is contained in:
parent
ab85c5a936
commit
b5ee2c1f98
4 changed files with 121 additions and 25 deletions
|
|
@ -9516,51 +9516,36 @@ func (m *Manager) LoadActiveAlerts() error {
|
|||
seen := make(map[string]bool)
|
||||
|
||||
for _, alert := range alerts {
|
||||
// Migrate legacy guest alert IDs (instance-node-VMID -> instance-VMID)
|
||||
// Migrate legacy guest alert IDs to the canonical guest format.
|
||||
// Check if this is a guest-related alert by looking at common alert types
|
||||
isGuestAlert := strings.Contains(alert.Type, "cpu") || strings.Contains(alert.Type, "memory") ||
|
||||
strings.Contains(alert.Type, "disk") || strings.Contains(alert.Type, "network") ||
|
||||
alert.Type == "guest-offline"
|
||||
if isGuestAlert {
|
||||
// Try to extract instance, node, and VMID from resource ID
|
||||
// Legacy format: instance-node-VMID or node-VMID (standalone)
|
||||
parts := strings.Split(alert.ResourceID, "-")
|
||||
|
||||
// Check if this looks like a legacy format (has node in the ID)
|
||||
// We can detect this if we have Node field and it appears in the ResourceID
|
||||
if alert.Node != "" && len(parts) >= 2 {
|
||||
var newResourceID string
|
||||
oldResourceID := alert.ResourceID
|
||||
|
||||
// Try to extract VMID (should be last part)
|
||||
vmidStr := parts[len(parts)-1]
|
||||
if _, err := strconv.Atoi(vmidStr); err == nil {
|
||||
// VMID is valid, now check if we need to migrate
|
||||
if len(parts) == 3 && alert.Instance != "" && alert.Instance != alert.Node {
|
||||
// Format: instance-node-VMID -> instance-VMID
|
||||
newResourceID = fmt.Sprintf("%s-%s", alert.Instance, vmidStr)
|
||||
} else if len(parts) == 2 && alert.Instance == alert.Node {
|
||||
// Format: node-VMID -> instance-VMID (standalone)
|
||||
newResourceID = fmt.Sprintf("%s-%s", alert.Instance, vmidStr)
|
||||
}
|
||||
vmid, _ := strconv.Atoi(vmidStr)
|
||||
newResourceID = BuildGuestKey(alert.Instance, alert.Node, vmid)
|
||||
|
||||
if newResourceID != "" && newResourceID != alert.ResourceID {
|
||||
if newResourceID != "" && newResourceID != oldResourceID {
|
||||
log.Info().
|
||||
Str("oldID", alert.ResourceID).
|
||||
Str("oldID", oldResourceID).
|
||||
Str("newID", newResourceID).
|
||||
Str("alertType", alert.Type).
|
||||
Msg("Migrating active alert from legacy guest ID format")
|
||||
|
||||
oldAlertID := alert.ID
|
||||
|
||||
// Update resource ID
|
||||
alert.ResourceID = newResourceID
|
||||
|
||||
// Update alert ID (usually contains resource ID)
|
||||
alert.ID = strings.Replace(alert.ID, alert.ResourceID, newResourceID, 1)
|
||||
|
||||
// Update seen map to use new ID
|
||||
seen[alert.ID] = true
|
||||
delete(seen, oldAlertID)
|
||||
alert.ID = strings.Replace(alert.ID, oldResourceID, newResourceID, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16831,6 +16831,62 @@ func TestLoadActiveAlerts(t *testing.T) {
|
|||
t.Errorf("first alert should win, got type %s", alert.Type)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("migrates legacy guest alert resource IDs to canonical format", func(t *testing.T) {
|
||||
m := newTestManager(t)
|
||||
m.ClearActiveAlerts()
|
||||
|
||||
alertsDir := filepath.Join(utils.GetDataDir(), "alerts")
|
||||
if err := os.MkdirAll(alertsDir, 0755); err != nil {
|
||||
t.Fatalf("failed to create alerts dir: %v", err)
|
||||
}
|
||||
|
||||
startTime := time.Now().Add(-10 * time.Minute)
|
||||
legacyResourceID := "pve1-100"
|
||||
canonicalResourceID := BuildGuestKey("pve1", "node1", 100)
|
||||
alerts := []Alert{
|
||||
{
|
||||
ID: legacyResourceID + "-cpu",
|
||||
Type: "cpu",
|
||||
Level: AlertLevelWarning,
|
||||
ResourceID: legacyResourceID,
|
||||
ResourceName: "test-vm",
|
||||
Node: "node1",
|
||||
Instance: "pve1",
|
||||
StartTime: startTime,
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
}
|
||||
|
||||
data, _ := json.Marshal(alerts)
|
||||
alertsFile := filepath.Join(alertsDir, "active-alerts.json")
|
||||
if err := os.WriteFile(alertsFile, data, 0644); err != nil {
|
||||
t.Fatalf("failed to write alerts json: %v", err)
|
||||
}
|
||||
|
||||
err := m.LoadActiveAlerts()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to load alerts: %v", err)
|
||||
}
|
||||
|
||||
m.mu.RLock()
|
||||
alert, exists := m.activeAlerts[canonicalResourceID+"-cpu"]
|
||||
_, oldExists := m.activeAlerts[legacyResourceID+"-cpu"]
|
||||
m.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
t.Fatal("expected canonical guest alert to be loaded")
|
||||
}
|
||||
if oldExists {
|
||||
t.Fatal("expected legacy guest alert ID to be replaced")
|
||||
}
|
||||
if alert.ResourceID != canonicalResourceID {
|
||||
t.Fatalf("expected resource ID %q, got %q", canonicalResourceID, alert.ResourceID)
|
||||
}
|
||||
if alert.ID != canonicalResourceID+"-cpu" {
|
||||
t.Fatalf("expected alert ID %q, got %q", canonicalResourceID+"-cpu", alert.ID)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestNamespaceMatchesInstance(t *testing.T) {
|
||||
|
|
|
|||
|
|
@ -268,7 +268,7 @@ func (m *Manager) getGuestThresholds(guest interface{}, guestID string) Threshol
|
|||
}
|
||||
|
||||
// Finally check guest-specific overrides (highest priority)
|
||||
// First try the new stable ID format (instance-VMID)
|
||||
// First try the current canonical ID format (instance:node:vmid)
|
||||
override, exists := m.config.Overrides[guestID]
|
||||
|
||||
// If not found, try legacy ID formats for migration
|
||||
|
|
@ -331,8 +331,8 @@ func (m *Manager) getGuestThresholds(guest interface{}, guestID string) Threshol
|
|||
return thresholds
|
||||
}
|
||||
|
||||
// tryLegacyOverrideMigration attempts to find and migrate legacy override formats.
|
||||
// Returns the override and true if found, or zero value and false otherwise.
|
||||
// tryLegacyOverrideMigration attempts to find and migrate legacy override formats
|
||||
// into the canonical guest ID format (instance:node:vmid).
|
||||
func (m *Manager) tryLegacyOverrideMigration(guest interface{}, guestID string) (ThresholdConfig, bool) {
|
||||
var node string
|
||||
var vmid int
|
||||
|
|
@ -370,6 +370,22 @@ func (m *Manager) tryLegacyOverrideMigration(guest interface{}, guestID string)
|
|||
}
|
||||
}
|
||||
|
||||
// Try legacy clustered format: instance-VMID
|
||||
if instance != "" {
|
||||
legacyID := fmt.Sprintf("%s-%d", instance, vmid)
|
||||
if legacyOverride, legacyExists := m.config.Overrides[legacyID]; legacyExists {
|
||||
log.Info().
|
||||
Str("legacyID", legacyID).
|
||||
Str("newID", guestID).
|
||||
Msg("Migrating guest override from legacy cluster ID format")
|
||||
|
||||
m.config.Overrides[guestID] = legacyOverride
|
||||
delete(m.config.Overrides, legacyID)
|
||||
|
||||
return legacyOverride, true
|
||||
}
|
||||
}
|
||||
|
||||
// Try standalone format: node-VMID
|
||||
if instance == node {
|
||||
legacyID := fmt.Sprintf("%s-%d", node, vmid)
|
||||
|
|
|
|||
|
|
@ -1595,6 +1595,45 @@ func TestGetGuestThresholds(t *testing.T) {
|
|||
}
|
||||
})
|
||||
|
||||
t.Run("legacy ID migration for clustered instance-vmid format", func(t *testing.T) {
|
||||
canonicalID := BuildGuestKey("pve1", "node1", 100)
|
||||
m := &Manager{
|
||||
config: AlertConfig{
|
||||
GuestDefaults: ThresholdConfig{},
|
||||
Overrides: map[string]ThresholdConfig{
|
||||
// Legacy cluster format: instance-vmid
|
||||
"pve1-100": {
|
||||
CPU: &HysteresisThreshold{Trigger: 65, Clear: 60},
|
||||
},
|
||||
},
|
||||
CustomRules: []CustomAlertRule{},
|
||||
},
|
||||
}
|
||||
|
||||
vm := models.VM{
|
||||
Name: "test-vm",
|
||||
Node: "node1",
|
||||
Instance: "pve1",
|
||||
VMID: 100,
|
||||
}
|
||||
|
||||
result := m.getGuestThresholds(vm, canonicalID)
|
||||
|
||||
if result.CPU == nil {
|
||||
t.Fatal("CPU threshold should not be nil after cluster legacy migration")
|
||||
}
|
||||
if result.CPU.Trigger != 65 {
|
||||
t.Errorf("expected CPU trigger 65 from migrated cluster legacy override, got %v", result.CPU.Trigger)
|
||||
}
|
||||
|
||||
if _, exists := m.config.Overrides[canonicalID]; !exists {
|
||||
t.Error("override should be migrated to canonical guest ID")
|
||||
}
|
||||
if _, exists := m.config.Overrides["pve1-100"]; exists {
|
||||
t.Error("old cluster legacy override should be removed after migration")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("works with container type", func(t *testing.T) {
|
||||
m := &Manager{
|
||||
config: AlertConfig{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue