Pulse/internal/alerts/threshold_update_test.go
2025-10-11 23:29:47 +00:00

218 lines
6.3 KiB
Go

package alerts
import (
"testing"
"time"
)
// TestReevaluateActiveAlertsOnThresholdChange tests that active alerts are re-evaluated when thresholds change
func TestReevaluateActiveAlertsOnThresholdChange(t *testing.T) {
// Create a new alert manager
manager := NewManager()
// Clear any alerts loaded from disk
manager.mu.Lock()
manager.activeAlerts = make(map[string]*Alert)
manager.mu.Unlock()
// Set initial configuration with CPU threshold at 80%
initialConfig := AlertConfig{
Enabled: true,
GuestDefaults: ThresholdConfig{
CPU: &HysteresisThreshold{Trigger: 80, Clear: 75},
Memory: &HysteresisThreshold{Trigger: 85, Clear: 80},
},
NodeDefaults: ThresholdConfig{
CPU: &HysteresisThreshold{Trigger: 80, Clear: 75},
Memory: &HysteresisThreshold{Trigger: 85, Clear: 80},
},
StorageDefault: HysteresisThreshold{Trigger: 85, Clear: 80},
Overrides: make(map[string]ThresholdConfig),
}
manager.UpdateConfig(initialConfig)
// Manually create an active alert for a VM with 91% CPU
alertID := "test-vm-101-cpu"
alert := &Alert{
ID: alertID,
Type: "cpu",
Level: AlertLevelWarning,
ResourceID: "test-vm-101",
ResourceName: "test-vm",
Node: "pve1",
Instance: "qemu/101",
Message: "CPU usage is 91%",
Value: 91.0,
Threshold: 80.0,
StartTime: time.Now().Add(-5 * time.Minute),
LastSeen: time.Now(),
}
manager.mu.Lock()
manager.activeAlerts[alertID] = alert
initialAlertCount := len(manager.activeAlerts)
manager.mu.Unlock()
// Verify the alert is active
if initialAlertCount != 1 {
t.Fatalf("Expected 1 active alert, got %d", initialAlertCount)
}
// Update configuration with higher CPU threshold at 95%
updatedConfig := initialConfig
updatedConfig.GuestDefaults.CPU = &HysteresisThreshold{Trigger: 95, Clear: 90}
manager.UpdateConfig(updatedConfig)
// Wait a moment for any goroutines to complete
time.Sleep(100 * time.Millisecond)
// Check if the alert was resolved
manager.mu.RLock()
finalAlertCount := len(manager.activeAlerts)
_, alertStillActive := manager.activeAlerts[alertID]
manager.mu.RUnlock()
if alertStillActive {
t.Errorf("Expected alert to be resolved after threshold increase, but it's still active")
}
if finalAlertCount != 0 {
t.Errorf("Expected 0 active alerts after threshold update, got %d", finalAlertCount)
}
// Verify the alert was added to recently resolved
manager.resolvedMutex.RLock()
_, wasResolved := manager.recentlyResolved[alertID]
manager.resolvedMutex.RUnlock()
if !wasResolved {
t.Errorf("Expected alert to be in recently resolved list")
}
}
// TestReevaluateActiveAlertsWithOverride tests threshold changes with resource-specific overrides
func TestReevaluateActiveAlertsWithOverride(t *testing.T) {
manager := NewManager()
// Clear any alerts loaded from disk
manager.mu.Lock()
manager.activeAlerts = make(map[string]*Alert)
manager.mu.Unlock()
// Set initial configuration
initialConfig := AlertConfig{
Enabled: true,
GuestDefaults: ThresholdConfig{
CPU: &HysteresisThreshold{Trigger: 80, Clear: 75},
},
NodeDefaults: ThresholdConfig{
CPU: &HysteresisThreshold{Trigger: 80, Clear: 75},
},
StorageDefault: HysteresisThreshold{Trigger: 85, Clear: 80},
Overrides: make(map[string]ThresholdConfig),
}
manager.UpdateConfig(initialConfig)
// Create an active alert for a specific VM with 91% CPU
alertID := "test-vm-202-cpu"
resourceID := "test-vm-202"
alert := &Alert{
ID: alertID,
Type: "cpu",
Level: AlertLevelWarning,
ResourceID: resourceID,
ResourceName: "test-vm",
Node: "pve1",
Instance: "qemu/101",
Message: "CPU usage is 91%",
Value: 91.0,
Threshold: 80.0,
StartTime: time.Now().Add(-5 * time.Minute),
LastSeen: time.Now(),
}
manager.mu.Lock()
manager.activeAlerts[alertID] = alert
manager.mu.Unlock()
// Update configuration with an override for this specific VM to 95%
updatedConfig := initialConfig
updatedConfig.Overrides[resourceID] = ThresholdConfig{
CPU: &HysteresisThreshold{Trigger: 95, Clear: 90},
}
manager.UpdateConfig(updatedConfig)
// Wait a moment for any goroutines to complete
time.Sleep(100 * time.Millisecond)
// Check if the alert was resolved
manager.mu.RLock()
_, alertStillActive := manager.activeAlerts[alertID]
manager.mu.RUnlock()
if alertStillActive {
t.Errorf("Expected alert to be resolved after override threshold increase, but it's still active")
}
}
// TestReevaluateActiveAlertsStillAboveThreshold tests that alerts stay active if still above threshold
func TestReevaluateActiveAlertsStillAboveThreshold(t *testing.T) {
manager := NewManager()
// Clear any alerts loaded from disk
manager.mu.Lock()
manager.activeAlerts = make(map[string]*Alert)
manager.mu.Unlock()
// Set initial configuration
initialConfig := AlertConfig{
Enabled: true,
GuestDefaults: ThresholdConfig{
CPU: &HysteresisThreshold{Trigger: 80, Clear: 75},
},
NodeDefaults: ThresholdConfig{
CPU: &HysteresisThreshold{Trigger: 80, Clear: 75},
},
StorageDefault: HysteresisThreshold{Trigger: 85, Clear: 80},
Overrides: make(map[string]ThresholdConfig),
}
manager.UpdateConfig(initialConfig)
// Create an active alert with 96% CPU
alertID := "test-vm-303-cpu"
alert := &Alert{
ID: alertID,
Type: "cpu",
Level: AlertLevelWarning,
ResourceID: "test-vm-303",
ResourceName: "test-vm",
Node: "pve1",
Instance: "qemu/101",
Message: "CPU usage is 96%",
Value: 96.0,
Threshold: 80.0,
StartTime: time.Now().Add(-5 * time.Minute),
LastSeen: time.Now(),
}
manager.mu.Lock()
manager.activeAlerts[alertID] = alert
manager.mu.Unlock()
// Update configuration with higher threshold at 90%, but alert value (96%) is still above it
updatedConfig := initialConfig
updatedConfig.GuestDefaults.CPU = &HysteresisThreshold{Trigger: 90, Clear: 85}
manager.UpdateConfig(updatedConfig)
// Wait a moment for any goroutines to complete
time.Sleep(100 * time.Millisecond)
// Alert should still be active since 96% > 90% trigger
manager.mu.RLock()
_, alertStillActive := manager.activeAlerts[alertID]
manager.mu.RUnlock()
if !alertStillActive {
t.Errorf("Expected alert to remain active since value (96%%) is still above new threshold (90%%)")
}
}