mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 03:20:11 +00:00
Fix template backup orphan detection race (#1352)
This commit is contained in:
parent
b9c6f504d8
commit
40249947ed
6 changed files with 151 additions and 51 deletions
|
|
@ -5562,6 +5562,7 @@ func (m *Manager) CheckBackups(
|
|||
pmgBackups []models.PMGBackup,
|
||||
guestsByKey map[string]GuestLookup,
|
||||
guestsByVMID map[string][]GuestLookup,
|
||||
templateInventoryReady map[string]bool,
|
||||
) {
|
||||
m.mu.RLock()
|
||||
enabled := m.config.Enabled
|
||||
|
|
@ -5763,16 +5764,24 @@ func (m *Manager) CheckBackups(
|
|||
return
|
||||
}
|
||||
|
||||
// Build a set of instances that have at least one live guest (non-empty
|
||||
// ResourceID). Orphan detection is only safe for instances whose guest
|
||||
// list has been populated — if an instance hasn't been polled yet
|
||||
// (startup race, auth failure, staggered polling), every backup from
|
||||
// that instance would look orphaned.
|
||||
instancesWithLiveGuests := make(map[string]bool)
|
||||
for _, guests := range guestsByVMID {
|
||||
for _, g := range guests {
|
||||
if g.ResourceID != "" && g.Instance != "" {
|
||||
instancesWithLiveGuests[g.Instance] = true
|
||||
// Build a set of instances whose inventory is safe to use for orphan detection.
|
||||
// When the monitor provides a template-inventory readiness map, prefer that
|
||||
// signal because backup polling can race ahead of template discovery even when
|
||||
// live guests already exist on the instance. Fall back to the legacy "has at
|
||||
// least one live guest" heuristic for direct callers/tests that do not pass it.
|
||||
instancesReadyForOrphanDetection := make(map[string]bool)
|
||||
if templateInventoryReady != nil {
|
||||
for instance, ready := range templateInventoryReady {
|
||||
if ready {
|
||||
instancesReadyForOrphanDetection[instance] = true
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for _, guests := range guestsByVMID {
|
||||
for _, g := range guests {
|
||||
if g.ResourceID != "" && g.Instance != "" {
|
||||
instancesReadyForOrphanDetection[g.Instance] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -5812,15 +5821,15 @@ func (m *Manager) CheckBackups(
|
|||
continue
|
||||
}
|
||||
// Determine whether we have enough inventory to safely run orphan
|
||||
// detection for this backup. For PVE storage backups the instance
|
||||
// detection for this backup. For PVE storage backups the instance
|
||||
// guard is strict: only check when that specific PVE instance has
|
||||
// been polled. For PBS/PMG backups (which span instances) it's
|
||||
// enough that *any* instance has live guests.
|
||||
// completed a template-aware inventory poll. For PBS/PMG backups
|
||||
// (which span instances) it's enough that any instance is ready.
|
||||
inventoryReady := false
|
||||
if record.source == "PVE storage" {
|
||||
inventoryReady = instancesWithLiveGuests[record.instance]
|
||||
inventoryReady = instancesReadyForOrphanDetection[record.instance]
|
||||
} else {
|
||||
inventoryReady = len(instancesWithLiveGuests) > 0
|
||||
inventoryReady = len(instancesReadyForOrphanDetection) > 0
|
||||
}
|
||||
if record.vmid != "" && record.lookup.ResourceID == "" && inventoryReady {
|
||||
// Backup has a VMID but no matching live guest in its lookup.
|
||||
|
|
@ -6157,9 +6166,9 @@ func (m *Manager) CheckBackups(
|
|||
if _, ok := validAlerts[alertID]; ok {
|
||||
continue
|
||||
}
|
||||
// When no instances have live inventory, preserve existing orphan
|
||||
// When no instances have inventory ready for orphan detection, preserve existing orphan
|
||||
// alerts rather than clearing them — we can't confirm they're resolved.
|
||||
if len(instancesWithLiveGuests) == 0 && alert.Type == "backup-orphaned" {
|
||||
if len(instancesReadyForOrphanDetection) == 0 && alert.Type == "backup-orphaned" {
|
||||
continue
|
||||
}
|
||||
m.clearAlertNoLock(alertID)
|
||||
|
|
|
|||
|
|
@ -976,7 +976,7 @@ func TestCheckBackupsCreatesAndClearsAlerts(t *testing.T) {
|
|||
"100": {guestsByKey[key]},
|
||||
}
|
||||
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
alert, exists := m.activeAlerts["backup-age-"+sanitizeAlertKey(key)]
|
||||
|
|
@ -990,7 +990,7 @@ func TestCheckBackupsCreatesAndClearsAlerts(t *testing.T) {
|
|||
|
||||
// Recent backup clears alert
|
||||
storageBackups[0].Time = now
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
_, exists = m.activeAlerts["backup-age-"+sanitizeAlertKey(key)]
|
||||
|
|
@ -1044,7 +1044,7 @@ func TestCheckBackupsRespectsOverrides(t *testing.T) {
|
|||
}
|
||||
|
||||
// 1. Verify warning alert is created with defaults
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
m.mu.RLock()
|
||||
alert, exists := m.activeAlerts["backup-age-"+sanitizeAlertKey(key)]
|
||||
m.mu.RUnlock()
|
||||
|
|
@ -1064,7 +1064,7 @@ func TestCheckBackupsRespectsOverrides(t *testing.T) {
|
|||
}
|
||||
m.UpdateConfig(cfg)
|
||||
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
m.mu.RLock()
|
||||
_, exists = m.activeAlerts["backup-age-"+sanitizeAlertKey(key)]
|
||||
m.mu.RUnlock()
|
||||
|
|
@ -1081,7 +1081,7 @@ func TestCheckBackupsRespectsOverrides(t *testing.T) {
|
|||
},
|
||||
}
|
||||
m.UpdateConfig(cfg)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
m.mu.RLock()
|
||||
_, exists = m.activeAlerts["backup-age-"+sanitizeAlertKey(key)]
|
||||
m.mu.RUnlock()
|
||||
|
|
@ -1095,7 +1095,7 @@ func TestCheckBackupsRespectsOverrides(t *testing.T) {
|
|||
}
|
||||
m.UpdateConfig(cfg)
|
||||
storageBackups[0].Time = now.Add(-30 * 24 * time.Hour) // Way past defaults
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
m.mu.RLock()
|
||||
_, exists = m.activeAlerts["backup-age-"+sanitizeAlertKey(key)]
|
||||
m.mu.RUnlock()
|
||||
|
|
@ -1138,7 +1138,7 @@ func TestCheckBackupsHandlesPbsOnlyGuests(t *testing.T) {
|
|||
"9999": {guestsByKey[sentinelKey]},
|
||||
}
|
||||
|
||||
m.CheckBackups(nil, pbsBackups, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(nil, pbsBackups, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
found := false
|
||||
|
|
@ -1217,7 +1217,7 @@ func TestCheckBackupsDisambiguatesWithNamespace(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
m.CheckBackups(nil, pbsBackups, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(nil, pbsBackups, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
|
@ -1299,7 +1299,7 @@ func TestCheckBackupsVMIDCollisionNonMatchingNamespace(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
m.CheckBackups(nil, pbsBackups, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(nil, pbsBackups, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
|
@ -1378,7 +1378,7 @@ func TestCheckBackupsVMIDCollisionNoNamespace(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
m.CheckBackups(nil, pbsBackups, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(nil, pbsBackups, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
|
@ -1426,7 +1426,7 @@ func TestCheckBackupsHandlesPmgBackups(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
m.CheckBackups(nil, nil, pmgBackups, map[string]GuestLookup{}, map[string][]GuestLookup{})
|
||||
m.CheckBackups(nil, nil, pmgBackups, map[string]GuestLookup{}, map[string][]GuestLookup{}, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
found := false
|
||||
|
|
@ -1483,7 +1483,7 @@ func TestCheckBackupsSkipsOrphanedWhenDisabled(t *testing.T) {
|
|||
"9999": {guestsByKey[sentinelKey]},
|
||||
}
|
||||
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
|
@ -1537,7 +1537,7 @@ func TestCheckBackupsCreatesOrphanedAlert(t *testing.T) {
|
|||
"9999": {guestsByKey[sentinelKey]},
|
||||
}
|
||||
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
|
@ -1609,7 +1609,7 @@ func TestCheckBackupsOrphanedAlertClearsWhenGuestReappears(t *testing.T) {
|
|||
}
|
||||
|
||||
// First cycle: guest 300 absent (only sentinel present) → orphaned alert fires.
|
||||
m.CheckBackups(storageBackups, nil, nil, sentinelByKey, sentinelByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, sentinelByKey, sentinelByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
orphanedFound := false
|
||||
|
|
@ -1631,7 +1631,7 @@ func TestCheckBackupsOrphanedAlertClearsWhenGuestReappears(t *testing.T) {
|
|||
guestsByVMID := map[string][]GuestLookup{
|
||||
"300": {guestsByKey[guestKey]},
|
||||
}
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
|
@ -1690,7 +1690,7 @@ func TestCheckBackupsOrphanedIgnoresVMIDs(t *testing.T) {
|
|||
}
|
||||
|
||||
// Both are orphaned (not in inventory), but VMID 200 matches ignore pattern "20*".
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
|
@ -1749,7 +1749,7 @@ func TestCheckBackupsOrphanedWithZeroAgeThresholds(t *testing.T) {
|
|||
}
|
||||
|
||||
// Orphaned guest with zero age thresholds — should still fire orphaned alert.
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
|
@ -1815,7 +1815,7 @@ func TestCheckBackupsOrphanedWithPersistedMetadata(t *testing.T) {
|
|||
"9999": {guestsByKey[sentinelKey]},
|
||||
}
|
||||
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
|
@ -1863,7 +1863,7 @@ func TestCheckBackupsOrphanedSkippedWhenNoLiveInventory(t *testing.T) {
|
|||
}
|
||||
|
||||
// Completely empty guest maps — no live inventory.
|
||||
m.CheckBackups(storageBackups, nil, nil, map[string]GuestLookup{}, map[string][]GuestLookup{})
|
||||
m.CheckBackups(storageBackups, nil, nil, map[string]GuestLookup{}, map[string][]GuestLookup{}, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
|
@ -1914,7 +1914,7 @@ func TestCheckBackupsOrphanedPreservedWhenNoLiveInventory(t *testing.T) {
|
|||
guestsByVMID := map[string][]GuestLookup{
|
||||
"9999": {guestsByKey[sentinelKey]},
|
||||
}
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
orphanFound := false
|
||||
|
|
@ -1929,7 +1929,7 @@ func TestCheckBackupsOrphanedPreservedWhenNoLiveInventory(t *testing.T) {
|
|||
}
|
||||
|
||||
// Second cycle: inventory disappears (empty maps) — orphan alert must be preserved.
|
||||
m.CheckBackups(storageBackups, nil, nil, map[string]GuestLookup{}, map[string][]GuestLookup{})
|
||||
m.CheckBackups(storageBackups, nil, nil, map[string]GuestLookup{}, map[string][]GuestLookup{}, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
|
@ -1990,7 +1990,7 @@ func TestCheckBackupsOrphanedCrossInstanceVMID(t *testing.T) {
|
|||
"9999": {guestsByKey[sentinelA]},
|
||||
}
|
||||
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
|
@ -2005,6 +2005,64 @@ func TestCheckBackupsOrphanedCrossInstanceVMID(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestCheckBackupsSkipsPVEOrphanDetectionUntilTemplateInventoryReady(t *testing.T) {
|
||||
m := newTestManager(t)
|
||||
m.ClearActiveAlerts()
|
||||
|
||||
alertOrphaned := true
|
||||
m.mu.Lock()
|
||||
m.config.Enabled = true
|
||||
m.config.BackupDefaults = BackupAlertConfig{
|
||||
Enabled: true,
|
||||
WarningDays: 7,
|
||||
CriticalDays: 14,
|
||||
AlertOrphaned: &alertOrphaned,
|
||||
IgnoreVMIDs: []string{},
|
||||
}
|
||||
m.mu.Unlock()
|
||||
|
||||
now := time.Now()
|
||||
storageBackups := []models.StorageBackup{
|
||||
{
|
||||
ID: "instA-node2-700-backup",
|
||||
Storage: "local",
|
||||
Node: "node2",
|
||||
Instance: "instA",
|
||||
Type: "qemu",
|
||||
VMID: 700,
|
||||
Time: now.Add(-1 * 24 * time.Hour),
|
||||
},
|
||||
}
|
||||
|
||||
// Simulate the startup/concurrency window where the instance has enough live
|
||||
// guest data to satisfy the legacy readiness heuristic, but template inventory
|
||||
// has not been populated yet. This backup must not be treated as orphaned.
|
||||
sentinelKey := BuildGuestKey("instA", "node3", 9999)
|
||||
guestsByKey := map[string]GuestLookup{
|
||||
sentinelKey: {
|
||||
ResourceID: "qemu/9999",
|
||||
Name: "sentinel-vm",
|
||||
Instance: "instA",
|
||||
Node: "node3",
|
||||
Type: "qemu",
|
||||
VMID: 9999,
|
||||
},
|
||||
}
|
||||
guestsByVMID := map[string][]GuestLookup{
|
||||
"9999": {guestsByKey[sentinelKey]},
|
||||
}
|
||||
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, map[string]bool{})
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
for id := range m.activeAlerts {
|
||||
if strings.HasPrefix(id, "backup-orphaned-") {
|
||||
t.Fatalf("expected no orphaned alert before template inventory is ready, found %s", id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckBackupsIgnoresVMIDs(t *testing.T) {
|
||||
m := newTestManager(t)
|
||||
m.ClearActiveAlerts()
|
||||
|
|
@ -2054,7 +2112,7 @@ func TestCheckBackupsIgnoresVMIDs(t *testing.T) {
|
|||
"200": {guestsByKey[keyAllowed]},
|
||||
}
|
||||
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID)
|
||||
m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
|
||||
|
||||
m.mu.RLock()
|
||||
_, ignoredExists := m.activeAlerts["backup-age-"+sanitizeAlertKey(keyIgnored)]
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ type State struct {
|
|||
TemperatureMonitoringEnabled bool `json:"temperatureMonitoringEnabled"`
|
||||
PVETagColors map[string]string `json:"pveTagColors,omitempty"`
|
||||
// templateVMIDs tracks Proxmox template VMIDs (not shown in UI, used for backup orphan detection).
|
||||
// The presence of an instance key also acts as the readiness signal that a successful
|
||||
// resources poll populated template inventory for that instance, even if the map is empty.
|
||||
// Unexported so it is never serialised to JSON or the API.
|
||||
templateVMIDs map[string]map[int]string // instance -> VMID -> node
|
||||
}
|
||||
|
|
@ -1056,7 +1058,7 @@ type Memory struct {
|
|||
Used int64 `json:"used"`
|
||||
Free int64 `json:"free"`
|
||||
Usage float64 `json:"usage"`
|
||||
Cache int64 `json:"cache,omitempty"` // Reclaimable buff/cache (Available - Free); used + cache + free ≈ total
|
||||
Cache int64 `json:"cache,omitempty"` // Reclaimable buff/cache (Available - Free); used + cache + free ≈ total
|
||||
Balloon int64 `json:"balloon,omitempty"`
|
||||
SwapUsed int64 `json:"swapUsed,omitempty"`
|
||||
SwapTotal int64 `json:"swapTotal,omitempty"`
|
||||
|
|
@ -3068,14 +3070,16 @@ func (s *State) GetContainers() []Container {
|
|||
// UpdateTemplateVMIDsForInstance stores the VMID→node mapping for Proxmox template guests
|
||||
// for a given instance. Templates are excluded from the main VM/container lists (they are not
|
||||
// monitored) but must be known to the backup orphan-detection logic so their backups are not
|
||||
// incorrectly flagged as orphaned. Pass nil or an empty map to clear the instance's entries.
|
||||
// incorrectly flagged as orphaned. Pass nil to clear the instance's entries entirely.
|
||||
// Passing an empty non-nil map records that inventory was populated and that the
|
||||
// instance currently has no templates.
|
||||
func (s *State) UpdateTemplateVMIDsForInstance(instance string, vmids map[int]string) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.templateVMIDs == nil {
|
||||
s.templateVMIDs = make(map[string]map[int]string)
|
||||
}
|
||||
if len(vmids) == 0 {
|
||||
if vmids == nil {
|
||||
delete(s.templateVMIDs, instance)
|
||||
} else {
|
||||
s.templateVMIDs[instance] = vmids
|
||||
|
|
|
|||
|
|
@ -186,6 +186,31 @@ func TestStateLinkHostAgentToNode(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestStateSnapshotPreservesEmptyTemplateInventoryReadiness(t *testing.T) {
|
||||
state := &State{}
|
||||
|
||||
state.UpdateTemplateVMIDsForInstance("instA", map[int]string{})
|
||||
|
||||
snapshot := state.GetSnapshot()
|
||||
if snapshot.TemplateVMIDs == nil {
|
||||
t.Fatalf("expected TemplateVMIDs map to be initialized in snapshot")
|
||||
}
|
||||
|
||||
templates, exists := snapshot.TemplateVMIDs["instA"]
|
||||
if !exists {
|
||||
t.Fatalf("expected empty template inventory for instA to be preserved as readiness signal")
|
||||
}
|
||||
if len(templates) != 0 {
|
||||
t.Fatalf("expected no template VMIDs for instA, got %v", templates)
|
||||
}
|
||||
|
||||
state.UpdateTemplateVMIDsForInstance("instA", nil)
|
||||
snapshot = state.GetSnapshot()
|
||||
if _, exists := snapshot.TemplateVMIDs["instA"]; exists {
|
||||
t.Fatalf("expected nil update to clear template inventory for instA")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateNodesForInstancePreservesLinkWhenNodeIDChanges(t *testing.T) {
|
||||
state := &State{
|
||||
Hosts: []Host{
|
||||
|
|
|
|||
|
|
@ -33,6 +33,8 @@ type StateSnapshot struct {
|
|||
TemperatureMonitoringEnabled bool `json:"temperatureMonitoringEnabled"`
|
||||
PVETagColors map[string]string `json:"pveTagColors,omitempty"`
|
||||
// TemplateVMIDs holds Proxmox template VMID→node mappings per instance.
|
||||
// The presence of an instance key also indicates that template inventory was
|
||||
// successfully populated for that instance, even when the inner map is empty.
|
||||
// Not serialised to JSON or the frontend API; used only for backup orphan detection.
|
||||
TemplateVMIDs map[string]map[int]string `json:"-"`
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10205,7 +10205,7 @@ func (m *Monitor) pollStorageBackupsWithNodes(ctx context.Context, instanceName
|
|||
|
||||
if m.alertManager != nil {
|
||||
snapshot := m.state.GetSnapshot()
|
||||
guestsByKey, guestsByVMID := buildGuestLookups(snapshot, m.guestMetadataStore)
|
||||
guestsByKey, guestsByVMID, templateInventoryReady := buildGuestLookups(snapshot, m.guestMetadataStore)
|
||||
pveStorage := snapshot.Backups.PVE.StorageBackups
|
||||
if len(pveStorage) == 0 && len(snapshot.PVEBackups.StorageBackups) > 0 {
|
||||
pveStorage = snapshot.PVEBackups.StorageBackups
|
||||
|
|
@ -10218,7 +10218,7 @@ func (m *Monitor) pollStorageBackupsWithNodes(ctx context.Context, instanceName
|
|||
if len(pmgBackups) == 0 && len(snapshot.PMGBackups) > 0 {
|
||||
pmgBackups = snapshot.PMGBackups
|
||||
}
|
||||
m.alertManager.CheckBackups(pveStorage, pbsBackups, pmgBackups, guestsByKey, guestsByVMID)
|
||||
m.alertManager.CheckBackups(pveStorage, pbsBackups, pmgBackups, guestsByKey, guestsByVMID, templateInventoryReady)
|
||||
}
|
||||
|
||||
// Clear permission warning if no permission errors occurred this cycle
|
||||
|
|
@ -10361,9 +10361,10 @@ func preserveFailedStorageBackups(instanceName string, snapshot models.StateSnap
|
|||
return current, storages
|
||||
}
|
||||
|
||||
func buildGuestLookups(snapshot models.StateSnapshot, metadataStore *config.GuestMetadataStore) (map[string]alerts.GuestLookup, map[string][]alerts.GuestLookup) {
|
||||
func buildGuestLookups(snapshot models.StateSnapshot, metadataStore *config.GuestMetadataStore) (map[string]alerts.GuestLookup, map[string][]alerts.GuestLookup, map[string]bool) {
|
||||
byKey := make(map[string]alerts.GuestLookup)
|
||||
byVMID := make(map[string][]alerts.GuestLookup)
|
||||
templateInventoryReady := make(map[string]bool)
|
||||
|
||||
for _, vm := range snapshot.VMs {
|
||||
info := alerts.GuestLookup{
|
||||
|
|
@ -10418,6 +10419,7 @@ func buildGuestLookups(snapshot models.StateSnapshot, metadataStore *config.Gues
|
|||
// existing templates as orphaned. Templates are excluded from the main VM/container
|
||||
// lists (they are not monitored), so without this they would appear as unknown guests.
|
||||
for instance, vmids := range snapshot.TemplateVMIDs {
|
||||
templateInventoryReady[instance] = true
|
||||
for vmid, node := range vmids {
|
||||
info := alerts.GuestLookup{
|
||||
ResourceID: makeGuestID(instance, node, vmid),
|
||||
|
|
@ -10434,7 +10436,7 @@ func buildGuestLookups(snapshot models.StateSnapshot, metadataStore *config.Gues
|
|||
}
|
||||
}
|
||||
|
||||
return byKey, byVMID
|
||||
return byKey, byVMID, templateInventoryReady
|
||||
}
|
||||
|
||||
// enrichWithPersistedMetadata adds entries from the metadata store for guests
|
||||
|
|
@ -11282,7 +11284,7 @@ func (m *Monitor) pollPBSBackups(ctx context.Context, instanceName string, clien
|
|||
|
||||
if m.alertManager != nil {
|
||||
snapshot := m.state.GetSnapshot()
|
||||
guestsByKey, guestsByVMID := buildGuestLookups(snapshot, m.guestMetadataStore)
|
||||
guestsByKey, guestsByVMID, templateInventoryReady := buildGuestLookups(snapshot, m.guestMetadataStore)
|
||||
pveStorage := snapshot.Backups.PVE.StorageBackups
|
||||
if len(pveStorage) == 0 && len(snapshot.PVEBackups.StorageBackups) > 0 {
|
||||
pveStorage = snapshot.PVEBackups.StorageBackups
|
||||
|
|
@ -11295,7 +11297,7 @@ func (m *Monitor) pollPBSBackups(ctx context.Context, instanceName string, clien
|
|||
if len(pmgBackups) == 0 && len(snapshot.PMGBackups) > 0 {
|
||||
pmgBackups = snapshot.PMGBackups
|
||||
}
|
||||
m.alertManager.CheckBackups(pveStorage, pbsBackups, pmgBackups, guestsByKey, guestsByVMID)
|
||||
m.alertManager.CheckBackups(pveStorage, pbsBackups, pmgBackups, guestsByKey, guestsByVMID, templateInventoryReady)
|
||||
}
|
||||
|
||||
// Immediately broadcast the updated state so frontend sees new backups
|
||||
|
|
@ -11547,7 +11549,7 @@ func (m *Monitor) checkMockAlerts() {
|
|||
Msg("Collecting resources for alert cleanup in mock mode")
|
||||
m.alertManager.CleanupAlertsForNodes(existingNodes)
|
||||
|
||||
guestsByKey, guestsByVMID := buildGuestLookups(state, m.guestMetadataStore)
|
||||
guestsByKey, guestsByVMID, templateInventoryReady := buildGuestLookups(state, m.guestMetadataStore)
|
||||
pveStorage := state.Backups.PVE.StorageBackups
|
||||
if len(pveStorage) == 0 && len(state.PVEBackups.StorageBackups) > 0 {
|
||||
pveStorage = state.PVEBackups.StorageBackups
|
||||
|
|
@ -11560,7 +11562,7 @@ func (m *Monitor) checkMockAlerts() {
|
|||
if len(pmgBackups) == 0 && len(state.PMGBackups) > 0 {
|
||||
pmgBackups = state.PMGBackups
|
||||
}
|
||||
m.alertManager.CheckBackups(pveStorage, pbsBackups, pmgBackups, guestsByKey, guestsByVMID)
|
||||
m.alertManager.CheckBackups(pveStorage, pbsBackups, pmgBackups, guestsByKey, guestsByVMID, templateInventoryReady)
|
||||
|
||||
// Limit how many guests we check per cycle to prevent blocking with large datasets
|
||||
const maxGuestsPerCycle = 50
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue