mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 11:30:15 +00:00
290 lines
7.1 KiB
Go
290 lines
7.1 KiB
Go
package alerts
|
|
|
|
import (
|
|
"encoding/json"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// TestPerMetricDelayConfiguration demonstrates the per-metric delay feature
|
|
// This test shows how to configure different alert delays for different metrics
|
|
func TestPerMetricDelayConfiguration(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
config AlertConfig
|
|
resourceType string
|
|
metricType string
|
|
expectedDelay int
|
|
description string
|
|
}{
|
|
{
|
|
name: "CPU alert with longer delay than memory",
|
|
config: AlertConfig{
|
|
TimeThresholds: map[string]int{
|
|
"guest": 60, // 60 second default for guests
|
|
},
|
|
MetricTimeThresholds: map[string]map[string]int{
|
|
"guest": {
|
|
"cpu": 300, // 5 minutes for CPU (transient spikes)
|
|
"memory": 30, // 30 seconds for memory (persistent issues)
|
|
},
|
|
},
|
|
},
|
|
resourceType: "guest",
|
|
metricType: "cpu",
|
|
expectedDelay: 300,
|
|
description: "CPU alerts should wait 5 minutes due to transient spikes",
|
|
},
|
|
{
|
|
name: "Memory alert with shorter delay",
|
|
config: AlertConfig{
|
|
TimeThresholds: map[string]int{
|
|
"guest": 60,
|
|
},
|
|
MetricTimeThresholds: map[string]map[string]int{
|
|
"guest": {
|
|
"cpu": 300,
|
|
"memory": 30,
|
|
},
|
|
},
|
|
},
|
|
resourceType: "guest",
|
|
metricType: "memory",
|
|
expectedDelay: 30,
|
|
description: "Memory alerts should trigger quickly as they're persistent",
|
|
},
|
|
{
|
|
name: "Disk alert falls back to resource type default",
|
|
config: AlertConfig{
|
|
TimeThresholds: map[string]int{
|
|
"guest": 60,
|
|
},
|
|
MetricTimeThresholds: map[string]map[string]int{
|
|
"guest": {
|
|
"cpu": 300,
|
|
"memory": 30,
|
|
},
|
|
},
|
|
},
|
|
resourceType: "guest",
|
|
metricType: "disk",
|
|
expectedDelay: 60,
|
|
description: "Disk has no specific override, uses guest default (60s)",
|
|
},
|
|
{
|
|
name: "Global metric delay when no type-specific base exists",
|
|
config: AlertConfig{
|
|
TimeThresholds: map[string]int{
|
|
"guest": 60,
|
|
// Note: no "storage" entry, so storage uses global metric delays
|
|
},
|
|
MetricTimeThresholds: map[string]map[string]int{
|
|
"all": {
|
|
"usage": 120, // 2 minutes for storage usage alerts globally
|
|
},
|
|
},
|
|
},
|
|
resourceType: "storage",
|
|
metricType: "usage",
|
|
expectedDelay: 120,
|
|
description: "Storage usage uses global metric override (120s) when no type-specific base delay exists",
|
|
},
|
|
{
|
|
name: "Specific override beats global override",
|
|
config: AlertConfig{
|
|
TimeThresholds: map[string]int{
|
|
"guest": 60,
|
|
},
|
|
MetricTimeThresholds: map[string]map[string]int{
|
|
"all": {
|
|
"cpu": 180, // 3 minutes globally
|
|
},
|
|
"guest": {
|
|
"cpu": 300, // 5 minutes for guests specifically
|
|
},
|
|
},
|
|
},
|
|
resourceType: "guest",
|
|
metricType: "cpu",
|
|
expectedDelay: 300,
|
|
description: "Guest-specific CPU delay (300s) overrides global CPU delay (180s)",
|
|
},
|
|
{
|
|
name: "Docker container with restart count alert",
|
|
config: AlertConfig{
|
|
TimeThresholds: map[string]int{
|
|
"guest": 60,
|
|
},
|
|
MetricTimeThresholds: map[string]map[string]int{
|
|
"docker": {
|
|
"restartcount": 10, // Quick notification for container restarts
|
|
"cpu": 120, // Longer for CPU
|
|
},
|
|
},
|
|
},
|
|
resourceType: "docker",
|
|
metricType: "restartcount",
|
|
expectedDelay: 10,
|
|
description: "Restart count alerts trigger quickly (10s) for immediate attention",
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
// Create a manager with the test config
|
|
manager := &Manager{
|
|
config: tt.config,
|
|
pendingAlerts: make(map[string]time.Time),
|
|
}
|
|
|
|
// Get the time threshold for this resource/metric combination
|
|
delay := manager.getTimeThreshold("test-resource-id", tt.resourceType, tt.metricType)
|
|
|
|
if delay != tt.expectedDelay {
|
|
t.Errorf("Expected delay %d seconds, got %d seconds\n%s",
|
|
tt.expectedDelay, delay, tt.description)
|
|
}
|
|
|
|
t.Logf("✓ %s: %d seconds", tt.description, delay)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestPerMetricDelayJSON demonstrates JSON serialization/deserialization
|
|
func TestPerMetricDelayJSON(t *testing.T) {
|
|
config := AlertConfig{
|
|
TimeThresholds: map[string]int{
|
|
"guest": 60,
|
|
"node": 30,
|
|
},
|
|
MetricTimeThresholds: map[string]map[string]int{
|
|
"guest": {
|
|
"cpu": 300,
|
|
"memory": 30,
|
|
"disk": 90,
|
|
},
|
|
"node": {
|
|
"cpu": 120,
|
|
"temperature": 180,
|
|
},
|
|
"all": {
|
|
"networkout": 60,
|
|
},
|
|
},
|
|
}
|
|
|
|
// Serialize to JSON
|
|
data, err := json.MarshalIndent(config, "", " ")
|
|
if err != nil {
|
|
t.Fatalf("Failed to marshal config: %v", err)
|
|
}
|
|
|
|
t.Logf("Configuration JSON:\n%s", string(data))
|
|
|
|
// Deserialize back
|
|
var decoded AlertConfig
|
|
if err := json.Unmarshal(data, &decoded); err != nil {
|
|
t.Fatalf("Failed to unmarshal config: %v", err)
|
|
}
|
|
|
|
// Verify the data round-trips correctly
|
|
if decoded.TimeThresholds["guest"] != 60 {
|
|
t.Errorf("TimeThresholds not preserved")
|
|
}
|
|
if decoded.MetricTimeThresholds["guest"]["cpu"] != 300 {
|
|
t.Errorf("MetricTimeThresholds not preserved")
|
|
}
|
|
|
|
t.Log("✓ JSON serialization/deserialization works correctly")
|
|
}
|
|
|
|
// TestPerMetricDelayUseCases documents common use cases
|
|
func TestPerMetricDelayUseCases(t *testing.T) {
|
|
t.Log("=== Per-Metric Delay Configuration Use Cases ===\n")
|
|
|
|
useCases := []struct {
|
|
scenario string
|
|
config string
|
|
reason string
|
|
}{
|
|
{
|
|
scenario: "Production VMs: Different delays for different metrics",
|
|
config: `{
|
|
"timeThresholds": {
|
|
"guest": 60
|
|
},
|
|
"metricTimeThresholds": {
|
|
"guest": {
|
|
"cpu": 300,
|
|
"memory": 60,
|
|
"disk": 120,
|
|
"diskread": 180,
|
|
"diskwrite": 180
|
|
}
|
|
}
|
|
}`,
|
|
reason: `
|
|
- CPU: 5 minutes (transient spikes are normal)
|
|
- Memory: 1 minute (persistent issues need attention)
|
|
- Disk: 2 minutes (filling up gradually)
|
|
- I/O: 3 minutes (workload-dependent, can spike)`,
|
|
},
|
|
{
|
|
scenario: "Proxmox Nodes: Temperature needs patience",
|
|
config: `{
|
|
"timeThresholds": {
|
|
"node": 30
|
|
},
|
|
"metricTimeThresholds": {
|
|
"node": {
|
|
"cpu": 120,
|
|
"memory": 60,
|
|
"temperature": 300
|
|
}
|
|
}
|
|
}`,
|
|
reason: `
|
|
- CPU: 2 minutes (load balancing can spike briefly)
|
|
- Memory: 1 minute (host memory issues are serious)
|
|
- Temperature: 5 minutes (fans need time to respond)`,
|
|
},
|
|
{
|
|
scenario: "Docker Containers: Fast alerts for restart issues",
|
|
config: `{
|
|
"timeThresholds": {
|
|
"guest": 60
|
|
},
|
|
"metricTimeThresholds": {
|
|
"docker": {
|
|
"restartcount": 10,
|
|
"cpu": 120,
|
|
"memory": 60
|
|
}
|
|
}
|
|
}`,
|
|
reason: `
|
|
- Restart count: 10 seconds (immediate notification needed)
|
|
- CPU: 2 minutes (containers can spike during startup)
|
|
- Memory: 1 minute (OOM is a critical issue)`,
|
|
},
|
|
{
|
|
scenario: "Global overrides for specific metrics across all types",
|
|
config: `{
|
|
"metricTimeThresholds": {
|
|
"all": {
|
|
"networkout": 300
|
|
}
|
|
}
|
|
}`,
|
|
reason: `
|
|
- Network Out: 5 minutes globally (backups/migrations cause spikes)
|
|
- Applies to VMs, containers, nodes, hosts unless overridden`,
|
|
},
|
|
}
|
|
|
|
for _, uc := range useCases {
|
|
t.Logf("\n📋 Scenario: %s\n", uc.scenario)
|
|
t.Logf("Configuration:\n%s\n", uc.config)
|
|
t.Logf("Reasoning:%s\n", uc.reason)
|
|
}
|
|
}
|