mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-26 10:31:17 +00:00
Debounce recovery for poll-driven offline alerts
This commit is contained in:
parent
005f64182f
commit
a86c7120cf
2 changed files with 310 additions and 7 deletions
|
|
@ -519,13 +519,14 @@ type Manager struct {
|
|||
// Time threshold tracking
|
||||
pendingAlerts map[string]time.Time // Track when thresholds were first exceeded
|
||||
// Offline confirmation tracking
|
||||
nodeOfflineCount map[string]int // Track consecutive offline counts for nodes (legacy)
|
||||
offlineConfirmations map[string]int // Track consecutive offline counts for all resources
|
||||
dockerOfflineCount map[string]int // Track consecutive offline counts for Docker hosts
|
||||
dockerStateConfirm map[string]int // Track consecutive state confirmations for Docker containers
|
||||
dockerRestartTracking map[string]*dockerRestartRecord // Track restart counts and times for restart loop detection
|
||||
dockerLastExitCode map[string]int // Track last exit code for OOM detection
|
||||
dockerUpdateFirstSeen map[string]time.Time // Track when image updates were first detected for alert delay
|
||||
nodeOfflineCount map[string]int // Track consecutive offline counts for nodes (legacy)
|
||||
offlineConfirmations map[string]int // Track consecutive offline counts for all resources
|
||||
offlineRecoveryConfirmations map[string]int // Track consecutive healthy confirmations before clearing poll-driven offline alerts
|
||||
dockerOfflineCount map[string]int // Track consecutive offline counts for Docker hosts
|
||||
dockerStateConfirm map[string]int // Track consecutive state confirmations for Docker containers
|
||||
dockerRestartTracking map[string]*dockerRestartRecord // Track restart counts and times for restart loop detection
|
||||
dockerLastExitCode map[string]int // Track last exit code for OOM detection
|
||||
dockerUpdateFirstSeen map[string]time.Time // Track when image updates were first detected for alert delay
|
||||
// Stable identity tracking prevents update-delay resets when host IDs churn.
|
||||
dockerUpdateFirstSeenByIdentity map[string]time.Time
|
||||
// PMG quarantine growth tracking
|
||||
|
|
@ -591,6 +592,7 @@ func NewManagerWithDataDir(dataDir string) *Manager {
|
|||
pendingAlerts: make(map[string]time.Time),
|
||||
nodeOfflineCount: make(map[string]int),
|
||||
offlineConfirmations: make(map[string]int),
|
||||
offlineRecoveryConfirmations: make(map[string]int),
|
||||
dockerOfflineCount: make(map[string]int),
|
||||
dockerStateConfirm: make(map[string]int),
|
||||
dockerRestartTracking: make(map[string]*dockerRestartRecord),
|
||||
|
|
@ -7496,6 +7498,7 @@ func (m *Manager) removeActiveAlertNoLock(alertID string) {
|
|||
m.historyManager.UpdateAlertLastSeen(alertID, alert.LastSeen)
|
||||
}
|
||||
delete(m.activeAlerts, alertID)
|
||||
delete(m.offlineRecoveryConfirmations, alertID)
|
||||
// NOTE: Don't delete ackState here - preserve it so if the same alert
|
||||
// reappears (e.g., powered-off VM during backup), the acknowledgement
|
||||
// is restored via preserveAlertState. ackState is cleaned up in Cleanup().
|
||||
|
|
@ -7506,6 +7509,22 @@ func (m *Manager) removeActiveAlertNoLock(alertID string) {
|
|||
}
|
||||
}
|
||||
|
||||
func (m *Manager) confirmOfflineRecoveryNoLock(alertID string, required int) (int, bool) {
|
||||
if required <= 1 {
|
||||
delete(m.offlineRecoveryConfirmations, alertID)
|
||||
return required, true
|
||||
}
|
||||
|
||||
m.offlineRecoveryConfirmations[alertID]++
|
||||
confirmations := m.offlineRecoveryConfirmations[alertID]
|
||||
if confirmations < required {
|
||||
return confirmations, false
|
||||
}
|
||||
|
||||
delete(m.offlineRecoveryConfirmations, alertID)
|
||||
return confirmations, true
|
||||
}
|
||||
|
||||
// GetActiveAlerts returns all active alerts
|
||||
func (m *Manager) GetActiveAlerts() []Alert {
|
||||
m.mu.RLock()
|
||||
|
|
@ -7628,6 +7647,7 @@ func (m *Manager) checkNodeOffline(node models.Node) {
|
|||
defer m.mu.Unlock()
|
||||
|
||||
// Check if node connectivity alerts are disabled
|
||||
delete(m.offlineRecoveryConfirmations, alertID)
|
||||
if override, exists := m.config.Overrides[node.ID]; exists && override.DisableConnectivity {
|
||||
// Node connectivity alerts are disabled, clear any existing alert and return
|
||||
if _, alertExists := m.activeAlerts[alertID]; alertExists {
|
||||
|
|
@ -7734,6 +7754,18 @@ func (m *Manager) clearNodeOfflineAlert(node models.Node) {
|
|||
// Check if offline alert exists
|
||||
alert, exists := m.activeAlerts[alertID]
|
||||
if !exists {
|
||||
delete(m.offlineRecoveryConfirmations, alertID)
|
||||
return
|
||||
}
|
||||
|
||||
const requiredRecoveryCount = 3
|
||||
recoveryCount, confirmed := m.confirmOfflineRecoveryNoLock(alertID, requiredRecoveryCount)
|
||||
if !confirmed {
|
||||
log.Debug().
|
||||
Str("node", node.Name).
|
||||
Int("confirmations", recoveryCount).
|
||||
Int("required", requiredRecoveryCount).
|
||||
Msg("Node appears back online, waiting for recovery confirmation")
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -7765,6 +7797,7 @@ func (m *Manager) checkPBSOffline(pbs models.PBSInstance) {
|
|||
defer m.mu.Unlock()
|
||||
|
||||
// Check if PBS offline alerts are disabled via disableConnectivity flag
|
||||
delete(m.offlineRecoveryConfirmations, alertID)
|
||||
if override, exists := m.config.Overrides[pbs.ID]; exists && (override.Disabled || override.DisableConnectivity) {
|
||||
// PBS connectivity alerts are disabled, clear any existing alert and return
|
||||
if _, alertExists := m.activeAlerts[alertID]; alertExists {
|
||||
|
|
@ -7852,6 +7885,18 @@ func (m *Manager) clearPBSOfflineAlert(pbs models.PBSInstance) {
|
|||
// Check if offline alert exists
|
||||
alert, exists := m.activeAlerts[alertID]
|
||||
if !exists {
|
||||
delete(m.offlineRecoveryConfirmations, alertID)
|
||||
return
|
||||
}
|
||||
|
||||
const requiredRecoveryCount = 3
|
||||
recoveryCount, confirmed := m.confirmOfflineRecoveryNoLock(alertID, requiredRecoveryCount)
|
||||
if !confirmed {
|
||||
log.Debug().
|
||||
Str("pbs", pbs.Name).
|
||||
Int("confirmations", recoveryCount).
|
||||
Int("required", requiredRecoveryCount).
|
||||
Msg("PBS appears back online, waiting for recovery confirmation")
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -7883,6 +7928,7 @@ func (m *Manager) checkPMGOffline(pmg models.PMGInstance) {
|
|||
defer m.mu.Unlock()
|
||||
|
||||
// Check if PMG offline alerts are disabled via disableConnectivity flag
|
||||
delete(m.offlineRecoveryConfirmations, alertID)
|
||||
if override, exists := m.config.Overrides[pmg.ID]; exists && (override.Disabled || override.DisableConnectivity) {
|
||||
// PMG connectivity alerts are disabled, clear any existing alert and return
|
||||
if _, alertExists := m.activeAlerts[alertID]; alertExists {
|
||||
|
|
@ -7970,6 +8016,18 @@ func (m *Manager) clearPMGOfflineAlert(pmg models.PMGInstance) {
|
|||
// Check if offline alert exists
|
||||
alert, exists := m.activeAlerts[alertID]
|
||||
if !exists {
|
||||
delete(m.offlineRecoveryConfirmations, alertID)
|
||||
return
|
||||
}
|
||||
|
||||
const requiredRecoveryCount = 3
|
||||
recoveryCount, confirmed := m.confirmOfflineRecoveryNoLock(alertID, requiredRecoveryCount)
|
||||
if !confirmed {
|
||||
log.Debug().
|
||||
Str("pmg", pmg.Name).
|
||||
Int("confirmations", recoveryCount).
|
||||
Int("required", requiredRecoveryCount).
|
||||
Msg("PMG appears back online, waiting for recovery confirmation")
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -9009,6 +9067,7 @@ func (m *Manager) checkStorageOffline(storage models.Storage) {
|
|||
defer m.mu.Unlock()
|
||||
|
||||
// Check if storage offline alerts are disabled
|
||||
delete(m.offlineRecoveryConfirmations, alertID)
|
||||
if override, exists, _ := findStorageOverride(m.config.Overrides, storage); exists && override.Disabled {
|
||||
// Storage alerts are disabled, clear any existing alert and return
|
||||
if _, alertExists := m.activeAlerts[alertID]; alertExists {
|
||||
|
|
@ -9097,6 +9156,18 @@ func (m *Manager) clearStorageOfflineAlert(storage models.Storage) {
|
|||
// Check if offline alert exists
|
||||
alert, exists := m.activeAlerts[alertID]
|
||||
if !exists {
|
||||
delete(m.offlineRecoveryConfirmations, alertID)
|
||||
return
|
||||
}
|
||||
|
||||
const requiredRecoveryCount = 2
|
||||
recoveryCount, confirmed := m.confirmOfflineRecoveryNoLock(alertID, requiredRecoveryCount)
|
||||
if !confirmed {
|
||||
log.Debug().
|
||||
Str("storage", storage.Name).
|
||||
Int("confirmations", recoveryCount).
|
||||
Int("required", requiredRecoveryCount).
|
||||
Msg("Storage appears back online, waiting for recovery confirmation")
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -10158,6 +10229,7 @@ func (m *Manager) ClearActiveAlerts() {
|
|||
m.alertRateLimit = make(map[string][]time.Time)
|
||||
m.nodeOfflineCount = make(map[string]int)
|
||||
m.offlineConfirmations = make(map[string]int)
|
||||
m.offlineRecoveryConfirmations = make(map[string]int)
|
||||
m.dockerOfflineCount = make(map[string]int)
|
||||
m.dockerStateConfirm = make(map[string]int)
|
||||
m.dockerRestartTracking = make(map[string]*dockerRestartRecord)
|
||||
|
|
@ -10275,6 +10347,14 @@ func (m *Manager) cleanupStaleMaps() {
|
|||
}
|
||||
}
|
||||
|
||||
// Clean up recovery confirmation counts for alerts that are no longer active
|
||||
for alertID := range m.offlineRecoveryConfirmations {
|
||||
if _, exists := m.activeAlerts[alertID]; !exists {
|
||||
delete(m.offlineRecoveryConfirmations, alertID)
|
||||
cleaned++
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up node offline counts (legacy)
|
||||
for nodeID := range m.nodeOfflineCount {
|
||||
hasRelatedAlert := false
|
||||
|
|
|
|||
|
|
@ -5183,6 +5183,20 @@ func TestClearStorageOfflineAlert(t *testing.T) {
|
|||
resolvedCh <- id
|
||||
})
|
||||
|
||||
m.clearStorageOfflineAlert(storage)
|
||||
m.mu.RLock()
|
||||
_, alertStillActive := m.activeAlerts[alertID]
|
||||
m.mu.RUnlock()
|
||||
if !alertStillActive {
|
||||
t.Fatal("expected alert to remain active until recovery is confirmed")
|
||||
}
|
||||
|
||||
select {
|
||||
case <-resolvedCh:
|
||||
t.Fatal("expected no resolved callback before recovery is confirmed")
|
||||
default:
|
||||
}
|
||||
|
||||
m.clearStorageOfflineAlert(storage)
|
||||
|
||||
m.mu.RLock()
|
||||
|
|
@ -8141,6 +8155,28 @@ func TestClearNodeOfflineAlert(t *testing.T) {
|
|||
m.mu.Unlock()
|
||||
|
||||
node := models.Node{ID: "node1", Name: "Node 1", Instance: "pve1"}
|
||||
m.clearNodeOfflineAlert(node)
|
||||
m.mu.RLock()
|
||||
_, alertStillActive := m.activeAlerts["node-offline-node1"]
|
||||
m.mu.RUnlock()
|
||||
if !alertStillActive {
|
||||
t.Fatal("expected alert to remain active until recovery is confirmed")
|
||||
}
|
||||
|
||||
select {
|
||||
case <-resolvedCh:
|
||||
t.Fatal("expected no resolved callback before recovery is confirmed")
|
||||
default:
|
||||
}
|
||||
|
||||
m.clearNodeOfflineAlert(node)
|
||||
m.mu.RLock()
|
||||
_, alertStillActive = m.activeAlerts["node-offline-node1"]
|
||||
m.mu.RUnlock()
|
||||
if !alertStillActive {
|
||||
t.Fatal("expected alert to remain active until final recovery confirmation")
|
||||
}
|
||||
|
||||
m.clearNodeOfflineAlert(node)
|
||||
|
||||
m.mu.RLock()
|
||||
|
|
@ -8194,6 +8230,7 @@ func TestClearOfflineAlertNoDeadlock(t *testing.T) {
|
|||
Type: "offline",
|
||||
StartTime: time.Now().Add(-5 * time.Minute),
|
||||
}
|
||||
m.offlineRecoveryConfirmations["node-offline-node1"] = 2
|
||||
m.mu.Unlock()
|
||||
},
|
||||
clearFn: func(m *Manager) {
|
||||
|
|
@ -8209,6 +8246,7 @@ func TestClearOfflineAlertNoDeadlock(t *testing.T) {
|
|||
Type: "offline",
|
||||
StartTime: time.Now().Add(-5 * time.Minute),
|
||||
}
|
||||
m.offlineRecoveryConfirmations["pbs-offline-pbs1"] = 2
|
||||
m.mu.Unlock()
|
||||
},
|
||||
clearFn: func(m *Manager) {
|
||||
|
|
@ -8224,6 +8262,7 @@ func TestClearOfflineAlertNoDeadlock(t *testing.T) {
|
|||
Type: "offline",
|
||||
StartTime: time.Now().Add(-5 * time.Minute),
|
||||
}
|
||||
m.offlineRecoveryConfirmations["pmg-offline-pmg1"] = 2
|
||||
m.mu.Unlock()
|
||||
},
|
||||
clearFn: func(m *Manager) {
|
||||
|
|
@ -8239,6 +8278,7 @@ func TestClearOfflineAlertNoDeadlock(t *testing.T) {
|
|||
Type: "offline",
|
||||
StartTime: time.Now().Add(-5 * time.Minute),
|
||||
}
|
||||
m.offlineRecoveryConfirmations["storage-offline-stor1"] = 1
|
||||
m.mu.Unlock()
|
||||
},
|
||||
clearFn: func(m *Manager) {
|
||||
|
|
@ -8342,6 +8382,28 @@ func TestClearPBSOfflineAlert(t *testing.T) {
|
|||
m.mu.Unlock()
|
||||
|
||||
pbs := models.PBSInstance{ID: "pbs1", Name: "PBS 1", Host: "pbs.local"}
|
||||
m.clearPBSOfflineAlert(pbs)
|
||||
m.mu.RLock()
|
||||
_, alertStillActive := m.activeAlerts["pbs-offline-pbs1"]
|
||||
m.mu.RUnlock()
|
||||
if !alertStillActive {
|
||||
t.Fatal("expected alert to remain active until recovery is confirmed")
|
||||
}
|
||||
|
||||
select {
|
||||
case <-resolvedCh:
|
||||
t.Fatal("expected no resolved callback before recovery is confirmed")
|
||||
default:
|
||||
}
|
||||
|
||||
m.clearPBSOfflineAlert(pbs)
|
||||
m.mu.RLock()
|
||||
_, alertStillActive = m.activeAlerts["pbs-offline-pbs1"]
|
||||
m.mu.RUnlock()
|
||||
if !alertStillActive {
|
||||
t.Fatal("expected alert to remain active until final recovery confirmation")
|
||||
}
|
||||
|
||||
m.clearPBSOfflineAlert(pbs)
|
||||
|
||||
m.mu.RLock()
|
||||
|
|
@ -8425,6 +8487,28 @@ func TestClearPMGOfflineAlert(t *testing.T) {
|
|||
m.mu.Unlock()
|
||||
|
||||
pmg := models.PMGInstance{ID: "pmg1", Name: "PMG 1", Host: "pmg.local"}
|
||||
m.clearPMGOfflineAlert(pmg)
|
||||
m.mu.RLock()
|
||||
_, alertStillActive := m.activeAlerts["pmg-offline-pmg1"]
|
||||
m.mu.RUnlock()
|
||||
if !alertStillActive {
|
||||
t.Fatal("expected alert to remain active until recovery is confirmed")
|
||||
}
|
||||
|
||||
select {
|
||||
case <-resolvedCh:
|
||||
t.Fatal("expected no resolved callback before recovery is confirmed")
|
||||
default:
|
||||
}
|
||||
|
||||
m.clearPMGOfflineAlert(pmg)
|
||||
m.mu.RLock()
|
||||
_, alertStillActive = m.activeAlerts["pmg-offline-pmg1"]
|
||||
m.mu.RUnlock()
|
||||
if !alertStillActive {
|
||||
t.Fatal("expected alert to remain active until final recovery confirmation")
|
||||
}
|
||||
|
||||
m.clearPMGOfflineAlert(pmg)
|
||||
|
||||
m.mu.RLock()
|
||||
|
|
@ -13370,6 +13454,22 @@ func TestCheckNode(t *testing.T) {
|
|||
Status: "online",
|
||||
ConnectionHealth: "connected",
|
||||
}
|
||||
m.CheckNode(node)
|
||||
m.mu.RLock()
|
||||
_, alertStillActive := m.activeAlerts["node-offline-node1"]
|
||||
m.mu.RUnlock()
|
||||
if !alertStillActive {
|
||||
t.Fatal("expected offline alert to remain until recovery is confirmed")
|
||||
}
|
||||
|
||||
m.CheckNode(node)
|
||||
m.mu.RLock()
|
||||
_, alertStillActive = m.activeAlerts["node-offline-node1"]
|
||||
m.mu.RUnlock()
|
||||
if !alertStillActive {
|
||||
t.Fatal("expected offline alert to remain until final recovery confirmation")
|
||||
}
|
||||
|
||||
m.CheckNode(node)
|
||||
|
||||
m.mu.RLock()
|
||||
|
|
@ -15429,6 +15529,22 @@ func TestCheckPBSComprehensive(t *testing.T) {
|
|||
ConnectionHealth: "healthy",
|
||||
}
|
||||
|
||||
m.CheckPBS(pbs)
|
||||
m.mu.RLock()
|
||||
_, offlineStillActive := m.activeAlerts["pbs-offline-pbs1"]
|
||||
m.mu.RUnlock()
|
||||
if !offlineStillActive {
|
||||
t.Fatal("expected offline alert to remain until recovery is confirmed")
|
||||
}
|
||||
|
||||
m.CheckPBS(pbs)
|
||||
m.mu.RLock()
|
||||
_, offlineStillActive = m.activeAlerts["pbs-offline-pbs1"]
|
||||
m.mu.RUnlock()
|
||||
if !offlineStillActive {
|
||||
t.Fatal("expected offline alert to remain until final recovery confirmation")
|
||||
}
|
||||
|
||||
m.CheckPBS(pbs)
|
||||
|
||||
m.mu.RLock()
|
||||
|
|
@ -15443,6 +15559,89 @@ func TestCheckPBSComprehensive(t *testing.T) {
|
|||
t.Error("expected offline confirmation to be cleared")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("transient healthy poll does not re-arm offline alert notifications", func(t *testing.T) {
|
||||
m := newTestManager(t)
|
||||
|
||||
alertsCh := make(chan string, 2)
|
||||
resolvedCh := make(chan string, 1)
|
||||
|
||||
m.mu.Lock()
|
||||
m.config.ActivationState = ActivationActive
|
||||
m.offlineConfirmations["pbs1"] = 2
|
||||
m.mu.Unlock()
|
||||
|
||||
m.SetAlertCallback(func(alert *Alert) {
|
||||
if alert != nil {
|
||||
alertsCh <- alert.ID
|
||||
}
|
||||
})
|
||||
m.SetResolvedCallback(func(alertID string) {
|
||||
resolvedCh <- alertID
|
||||
})
|
||||
|
||||
offlinePBS := models.PBSInstance{
|
||||
ID: "pbs1",
|
||||
Name: "testpbs",
|
||||
Status: "offline",
|
||||
}
|
||||
onlinePBS := models.PBSInstance{
|
||||
ID: "pbs1",
|
||||
Name: "testpbs",
|
||||
Status: "online",
|
||||
ConnectionHealth: "healthy",
|
||||
}
|
||||
|
||||
m.CheckPBS(offlinePBS)
|
||||
select {
|
||||
case alertID := <-alertsCh:
|
||||
if alertID != "pbs-offline-pbs1" {
|
||||
t.Fatalf("expected initial PBS offline notification, got %q", alertID)
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("expected initial PBS offline notification")
|
||||
}
|
||||
|
||||
m.CheckPBS(onlinePBS)
|
||||
|
||||
m.mu.RLock()
|
||||
_, stillActive := m.activeAlerts["pbs-offline-pbs1"]
|
||||
recoveryCount := m.offlineRecoveryConfirmations["pbs-offline-pbs1"]
|
||||
m.mu.RUnlock()
|
||||
|
||||
if !stillActive {
|
||||
t.Fatal("expected transient healthy poll to keep the offline alert active")
|
||||
}
|
||||
if recoveryCount != 1 {
|
||||
t.Fatalf("expected recovery confirmation count 1 after transient healthy poll, got %d", recoveryCount)
|
||||
}
|
||||
|
||||
select {
|
||||
case resolvedID := <-resolvedCh:
|
||||
t.Fatalf("expected no recovery notification from a single healthy poll, got %q", resolvedID)
|
||||
default:
|
||||
}
|
||||
|
||||
m.CheckPBS(offlinePBS)
|
||||
|
||||
m.mu.RLock()
|
||||
_, stillActive = m.activeAlerts["pbs-offline-pbs1"]
|
||||
_, recoveryTracked := m.offlineRecoveryConfirmations["pbs-offline-pbs1"]
|
||||
m.mu.RUnlock()
|
||||
|
||||
if !stillActive {
|
||||
t.Fatal("expected offline alert to remain active after connectivity drops again")
|
||||
}
|
||||
if recoveryTracked {
|
||||
t.Fatal("expected transient recovery tracking to reset once PBS is offline again")
|
||||
}
|
||||
|
||||
select {
|
||||
case alertID := <-alertsCh:
|
||||
t.Fatalf("expected no duplicate offline notification while alert stays active, got %q", alertID)
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestCheckPMGComprehensive(t *testing.T) {
|
||||
|
|
@ -15694,6 +15893,22 @@ func TestCheckPMGComprehensive(t *testing.T) {
|
|||
ConnectionHealth: "healthy",
|
||||
}
|
||||
|
||||
m.CheckPMG(pmg)
|
||||
m.mu.RLock()
|
||||
_, offlineStillActive := m.activeAlerts["pmg-offline-pmg1"]
|
||||
m.mu.RUnlock()
|
||||
if !offlineStillActive {
|
||||
t.Fatal("expected offline alert to remain until recovery is confirmed")
|
||||
}
|
||||
|
||||
m.CheckPMG(pmg)
|
||||
m.mu.RLock()
|
||||
_, offlineStillActive = m.activeAlerts["pmg-offline-pmg1"]
|
||||
m.mu.RUnlock()
|
||||
if !offlineStillActive {
|
||||
t.Fatal("expected offline alert to remain until final recovery confirmation")
|
||||
}
|
||||
|
||||
m.CheckPMG(pmg)
|
||||
|
||||
m.mu.RLock()
|
||||
|
|
@ -16049,6 +16264,14 @@ func TestCheckStorageComprehensive(t *testing.T) {
|
|||
Status: "active",
|
||||
}
|
||||
|
||||
m.CheckStorage(storage)
|
||||
m.mu.RLock()
|
||||
_, offlineStillActive := m.activeAlerts["storage-offline-storage1"]
|
||||
m.mu.RUnlock()
|
||||
if !offlineStillActive {
|
||||
t.Fatal("expected offline alert to remain until recovery is confirmed")
|
||||
}
|
||||
|
||||
m.CheckStorage(storage)
|
||||
|
||||
m.mu.RLock()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue