Pulse/internal/alerts/unified_eval_test.go

798 lines
23 KiB
Go

package alerts
import (
"sort"
"testing"
"time"
alertspecs "github.com/rcourtman/pulse-go-rewrite/internal/alerts/specs"
"github.com/rcourtman/pulse-go-rewrite/internal/models"
)
func unifiedEvalBaseConfig() AlertConfig {
return AlertConfig{
Enabled: true,
ActivationState: ActivationActive,
GuestDefaults: ThresholdConfig{
CPU: &HysteresisThreshold{Trigger: 80, Clear: 75},
Memory: &HysteresisThreshold{Trigger: 85, Clear: 80},
Disk: &HysteresisThreshold{Trigger: 90, Clear: 85},
},
NodeDefaults: ThresholdConfig{
CPU: &HysteresisThreshold{Trigger: 80, Clear: 75},
Memory: &HysteresisThreshold{Trigger: 85, Clear: 80},
Disk: &HysteresisThreshold{Trigger: 90, Clear: 85},
},
AgentDefaults: ThresholdConfig{
CPU: &HysteresisThreshold{Trigger: 80, Clear: 75},
Memory: &HysteresisThreshold{Trigger: 85, Clear: 80},
Disk: &HysteresisThreshold{Trigger: 90, Clear: 85},
},
PBSDefaults: ThresholdConfig{
CPU: &HysteresisThreshold{Trigger: 80, Clear: 75},
Memory: &HysteresisThreshold{Trigger: 85, Clear: 80},
},
StorageDefault: HysteresisThreshold{Trigger: 85, Clear: 80},
Overrides: map[string]ThresholdConfig{},
// Keep these explicit to make test intent obvious; final values are forced in configureUnifiedEvalManager.
TimeThresholds: map[string]int{},
SuppressionWindow: 0,
MinimumDelta: 0,
}
}
func configureUnifiedEvalManager(t *testing.T, m *Manager, cfg AlertConfig) {
t.Helper()
m.UpdateConfig(cfg)
// UpdateConfig normalizes zero values back to defaults; force immediate alerting in tests.
m.mu.Lock()
m.config.TimeThresholds = map[string]int{}
m.config.MetricTimeThresholds = nil
m.config.SuppressionWindow = 0
m.config.MinimumDelta = 0
m.mu.Unlock()
m.ClearActiveAlerts()
}
func alertKeys(m *Manager) []string {
m.mu.RLock()
defer m.mu.RUnlock()
keys := make([]string, 0, len(m.activeAlerts))
for storageKey, alert := range m.activeAlerts {
keys = append(keys, effectiveAlertID(alert, storageKey))
}
sort.Strings(keys)
return keys
}
func assertAlertPresent(t *testing.T, m *Manager, alertID string) {
t.Helper()
m.mu.RLock()
_, exists := testLookupActiveAlert(t, m, alertID)
m.mu.RUnlock()
if !exists {
t.Fatalf("expected alert %q to exist, active alerts: %v", alertID, alertKeys(m))
}
}
func assertAlertMissing(t *testing.T, m *Manager, alertID string) {
t.Helper()
m.mu.RLock()
_, exists := testLookupActiveAlert(t, m, alertID)
m.mu.RUnlock()
if exists {
t.Fatalf("expected alert %q to be absent, active alerts: %v", alertID, alertKeys(m))
}
}
func TestCheckUnifiedResourceMajorFamilies(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
tests := []struct {
name string
alertID string
input *UnifiedResourceInput
}{
{
name: "VM CPU above threshold creates alert",
alertID: canonicalMetricStateID("vm-101", "cpu"),
input: &UnifiedResourceInput{
ID: "vm-101",
Type: "vm",
Name: "vm-101",
CPU: &UnifiedResourceMetric{Percent: 90},
},
},
{
name: "System container CPU above threshold creates alert",
alertID: canonicalMetricStateID("lxc-200", "cpu"),
input: &UnifiedResourceInput{
ID: "lxc-200",
Type: "system-container",
Name: "worker-ct",
CPU: &UnifiedResourceMetric{Percent: 91},
},
},
{
name: "Node memory above threshold creates alert",
alertID: canonicalMetricStateID("node-a", "memory"),
input: &UnifiedResourceInput{
ID: "node-a",
Type: "node",
Name: "node-a",
Memory: &UnifiedResourceMetric{Percent: 90},
},
},
{
name: "Agent disk above threshold creates alert",
alertID: canonicalMetricStateID("host-1", "disk"),
input: &UnifiedResourceInput{
ID: "host-1",
Type: "agent",
Name: "host-1",
Disk: &UnifiedResourceMetric{Percent: 95},
},
},
{
name: "Storage usage above threshold creates alert",
alertID: canonicalMetricStateID("storage-1", "usage"),
input: &UnifiedResourceInput{
ID: "storage-1",
Type: "storage",
Name: "storage-1",
Disk: &UnifiedResourceMetric{Percent: 92},
},
},
{
name: "PBS CPU above threshold creates alert",
alertID: canonicalMetricStateID("pbs-1", "cpu"),
input: &UnifiedResourceInput{
ID: "pbs-1",
Type: "pbs",
Name: "pbs-1",
CPU: &UnifiedResourceMetric{Percent: 88},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m.ClearActiveAlerts()
m.CheckUnifiedResource(tt.input)
assertAlertPresent(t, m, tt.alertID)
})
}
}
func TestCheckUnifiedResourceRejectsLegacyGuestTypeAlias(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
m.CheckUnifiedResource(&UnifiedResourceInput{
ID: "legacy-ct-200",
Type: "lxc",
Name: "legacy-ct",
CPU: &UnifiedResourceMetric{Percent: 95},
})
assertAlertMissing(t, m, canonicalMetricStateID("legacy-ct-200", "cpu"))
}
func TestCheckUnifiedResourceOverrideLowerThresholdCreatesAlert(t *testing.T) {
m := newTestManager(t)
cfg := unifiedEvalBaseConfig()
cfg.Overrides["vm-override"] = ThresholdConfig{
CPU: &HysteresisThreshold{Trigger: 60, Clear: 55},
}
configureUnifiedEvalManager(t, m, cfg)
m.CheckUnifiedResource(&UnifiedResourceInput{
ID: "vm-override",
Type: "vm",
Name: "vm-override",
CPU: &UnifiedResourceMetric{Percent: 65},
})
assertAlertPresent(t, m, canonicalMetricStateID("vm-override", "cpu"))
}
func TestCheckUnifiedResourceNilInputNoPanic(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
defer func() {
if r := recover(); r != nil {
t.Fatalf("CheckUnifiedResource(nil) panicked: %v", r)
}
}()
m.CheckUnifiedResource(nil)
}
func TestCheckUnifiedResourceDisabledThresholdsNoAlert(t *testing.T) {
m := newTestManager(t)
cfg := unifiedEvalBaseConfig()
cfg.GuestDefaults.Disabled = true
configureUnifiedEvalManager(t, m, cfg)
m.CheckUnifiedResource(&UnifiedResourceInput{
ID: "vm-disabled",
Type: "vm",
Name: "vm-disabled",
CPU: &UnifiedResourceMetric{Percent: 95},
})
assertAlertMissing(t, m, canonicalMetricStateID("vm-disabled", "cpu"))
}
func TestCheckUnifiedResourceAnnotatesMetricAlertsWithCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
m.CheckUnifiedResource(&UnifiedResourceInput{
ID: "vm-annotated",
Type: "vm",
Name: "vm-annotated",
CPU: &UnifiedResourceMetric{Percent: 90},
})
m.mu.RLock()
alertID := canonicalMetricStateID("vm-annotated", "cpu")
alert := testRequireActiveAlert(t, m, alertID)
m.mu.RUnlock()
if alert == nil {
t.Fatalf("expected %s alert", alertID)
}
if got := alert.Metadata["canonicalAlertKind"]; got != string(alertspecs.AlertSpecKindMetricThreshold) {
t.Fatalf("canonicalAlertKind = %v, want %s", got, alertspecs.AlertSpecKindMetricThreshold)
}
if got := alert.Metadata["canonicalSpecID"]; got != canonicalMetricSpecID("vm-annotated", "cpu") {
t.Fatalf("canonicalSpecID = %v, want %s", got, canonicalMetricSpecID("vm-annotated", "cpu"))
}
if alert.CanonicalSpecID != canonicalMetricSpecID("vm-annotated", "cpu") {
t.Fatalf("CanonicalSpecID = %q, want %s", alert.CanonicalSpecID, canonicalMetricSpecID("vm-annotated", "cpu"))
}
if alert.CanonicalKind != string(alertspecs.AlertSpecKindMetricThreshold) {
t.Fatalf("CanonicalKind = %q, want %s", alert.CanonicalKind, alertspecs.AlertSpecKindMetricThreshold)
}
if alert.CanonicalState != canonicalMetricStateID("vm-annotated", "cpu") {
t.Fatalf("CanonicalState = %q, want %s", alert.CanonicalState, canonicalMetricStateID("vm-annotated", "cpu"))
}
}
func TestCheckUnifiedResourceKeepsInstanceScopedNodeDisplayNames(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
m.UpdateNodeDisplayName("cluster-a", "pve", "Alpha")
m.UpdateNodeDisplayName("cluster-b", "pve", "Beta")
m.CheckUnifiedResource(&UnifiedResourceInput{
ID: "vm-a",
Type: "vm",
Name: "vm-a",
Node: "pve",
Instance: "cluster-a",
CPU: &UnifiedResourceMetric{Percent: 90},
})
m.CheckUnifiedResource(&UnifiedResourceInput{
ID: "vm-b",
Type: "vm",
Name: "vm-b",
Node: "pve",
Instance: "cluster-b",
CPU: &UnifiedResourceMetric{Percent: 91},
})
m.UpdateNodeDisplayName("cluster-a", "pve", "Alpha Updated")
m.CheckUnifiedResource(&UnifiedResourceInput{
ID: "vm-a",
Type: "vm",
Name: "vm-a",
Node: "pve",
Instance: "cluster-a",
CPU: &UnifiedResourceMetric{Percent: 92},
})
m.mu.RLock()
alertA := testRequireActiveAlert(t, m, canonicalMetricStateID("vm-a", "cpu"))
alertB := testRequireActiveAlert(t, m, canonicalMetricStateID("vm-b", "cpu"))
m.mu.RUnlock()
if alertA.NodeDisplayName != "Alpha Updated" {
t.Fatalf("vm-a node display name = %q, want %q", alertA.NodeDisplayName, "Alpha Updated")
}
if alertB.NodeDisplayName != "Beta" {
t.Fatalf("vm-b node display name = %q, want %q", alertB.NodeDisplayName, "Beta")
}
gotByResourceID := make(map[string]Alert)
for _, alert := range m.GetActiveAlerts() {
gotByResourceID[alert.ResourceID] = alert
}
if gotByResourceID["vm-a"].NodeDisplayName != "Alpha Updated" {
t.Fatalf("GetActiveAlerts vm-a node display name = %q, want %q", gotByResourceID["vm-a"].NodeDisplayName, "Alpha Updated")
}
if gotByResourceID["vm-b"].NodeDisplayName != "Beta" {
t.Fatalf("GetActiveAlerts vm-b node display name = %q, want %q", gotByResourceID["vm-b"].NodeDisplayName, "Beta")
}
}
func TestCheckGuestPerDiskAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
guestID := BuildGuestKey("pve1", "node1", 101)
m.CheckGuest(models.VM{
ID: guestID,
VMID: 101,
Name: "app01",
Node: "node1",
Instance: "pve1",
Status: "running",
CPU: 0.20,
Memory: models.Memory{Usage: 40},
Disk: models.Disk{Usage: 40},
Disks: []models.Disk{
{
Mountpoint: "/",
Device: "scsi0",
Usage: 95,
Total: 100,
Used: 95,
Free: 5,
},
},
}, "pve1")
resourceID := guestID + "-disk-scsi0"
alertID := canonicalMetricStateID(resourceID, "disk")
m.mu.RLock()
alert := testRequireActiveAlert(t, m, alertID)
m.mu.RUnlock()
if alert == nil {
t.Fatalf("expected guest disk alert %q", alertID)
}
if got := alert.Metadata["canonicalAlertKind"]; got != string(alertspecs.AlertSpecKindMetricThreshold) {
t.Fatalf("canonicalAlertKind = %v, want %s", got, alertspecs.AlertSpecKindMetricThreshold)
}
if got := alert.Metadata["canonicalSpecID"]; got != canonicalMetricSpecID(resourceID, "disk") {
t.Fatalf("canonicalSpecID = %v, want %s", got, canonicalMetricSpecID(resourceID, "disk"))
}
}
func TestCheckNodeTemperatureAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
m.CheckNode(models.Node{
ID: "node/pve-1",
Name: "pve-1",
Instance: "pve-1",
Status: "online",
CPU: 0.20,
Memory: models.Memory{Usage: 40},
Disk: models.Disk{Usage: 40},
Temperature: &models.Temperature{
Available: true,
CPUPackage: 90,
},
})
m.mu.RLock()
alertID := canonicalMetricStateID("node/pve-1", "temperature")
alert := testRequireActiveAlert(t, m, alertID)
m.mu.RUnlock()
if alert == nil {
t.Fatal("expected node temperature alert")
}
if got := alert.Metadata["canonicalAlertKind"]; got != string(alertspecs.AlertSpecKindMetricThreshold) {
t.Fatalf("canonicalAlertKind = %v, want %s", got, alertspecs.AlertSpecKindMetricThreshold)
}
if got := alert.Metadata["canonicalSpecID"]; got != canonicalMetricSpecID("node/pve-1", "temperature") {
t.Fatalf("canonicalSpecID = %v, want %s", got, canonicalMetricSpecID("node/pve-1", "temperature"))
}
}
func TestCheckGuestPoweredOffAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
resourceID := BuildGuestKey("pve1", "node1", 101)
guest := models.VM{
ID: resourceID,
VMID: 101,
Name: "app01",
Node: "node1",
Instance: "pve1",
Status: "stopped",
}
m.CheckGuest(guest, "pve1")
m.CheckGuest(guest, "pve1")
alert := activeAlert(t, m, "guest-powered-off-"+resourceID)
if got := alert.Metadata["canonicalAlertKind"]; got != "powered-state" {
t.Fatalf("canonicalAlertKind = %v, want powered-state", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != resourceID+"-powered-state" {
t.Fatalf("canonicalSpecID = %v, want %s", got, resourceID+"-powered-state")
}
}
func TestCheckNodeOfflineAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
m.mu.Lock()
m.nodeOfflineCount["node/pve-1"] = 2
m.mu.Unlock()
m.CheckNode(models.Node{
ID: "node/pve-1",
Name: "pve-1",
Instance: "pve1",
Status: "offline",
ConnectionHealth: "failed",
})
alert := activeAlert(t, m, buildCanonicalStateID("node/pve-1", "node/pve-1-connectivity"))
if got := alert.Metadata["canonicalAlertKind"]; got != "connectivity" {
t.Fatalf("canonicalAlertKind = %v, want connectivity", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != "node/pve-1-connectivity" {
t.Fatalf("canonicalSpecID = %v, want %s", got, "node/pve-1-connectivity")
}
}
func TestCheckPBSOfflineAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
m.mu.Lock()
m.offlineConfirmations["pbs-1"] = 2
m.mu.Unlock()
m.CheckPBS(models.PBSInstance{
ID: "pbs-1",
Name: "pbs-main",
Host: "pbs-host",
Status: "online",
ConnectionHealth: "unhealthy",
})
alert := activeAlert(t, m, "pbs-offline-pbs-1")
if got := alert.Metadata["canonicalAlertKind"]; got != "connectivity" {
t.Fatalf("canonicalAlertKind = %v, want connectivity", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != "pbs-1-connectivity" {
t.Fatalf("canonicalSpecID = %v, want %s", got, "pbs-1-connectivity")
}
}
func TestCheckStorageOfflineAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
m.mu.Lock()
m.offlineConfirmations["storage-1"] = 1
m.mu.Unlock()
m.CheckStorage(models.Storage{
ID: "storage-1",
Name: "local-lvm",
Node: "pve-1",
Instance: "pve1",
Status: "unavailable",
})
alert := activeAlert(t, m, "storage-offline-storage-1")
if got := alert.Metadata["canonicalAlertKind"]; got != "connectivity" {
t.Fatalf("canonicalAlertKind = %v, want connectivity", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != "storage-1-connectivity" {
t.Fatalf("canonicalSpecID = %v, want %s", got, "storage-1-connectivity")
}
}
func TestCheckPMGOfflineAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
m.mu.Lock()
m.offlineConfirmations["pmg-1"] = 2
m.mu.Unlock()
m.CheckPMG(models.PMGInstance{
ID: "pmg-1",
Name: "pmg-main",
Host: "pmg-host",
Status: "online",
ConnectionHealth: "unhealthy",
})
alert := activeAlert(t, m, "pmg-offline-pmg-1")
if got := alert.Metadata["canonicalAlertKind"]; got != "connectivity" {
t.Fatalf("canonicalAlertKind = %v, want connectivity", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != "pmg-1-connectivity" {
t.Fatalf("canonicalSpecID = %v, want %s", got, "pmg-1-connectivity")
}
}
func TestHandleDockerHostOfflineAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
m.mu.Lock()
m.dockerOfflineCount["docker1"] = 2
m.mu.Unlock()
m.HandleDockerHostOffline(models.DockerHost{
ID: "docker1",
DisplayName: "Docker Host 1",
Hostname: "docker.local",
AgentID: "agent-123",
})
alert := activeAlert(t, m, "docker-host-offline-docker1")
if got := alert.Metadata["canonicalAlertKind"]; got != "connectivity" {
t.Fatalf("canonicalAlertKind = %v, want connectivity", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != "docker:docker1-connectivity" {
t.Fatalf("canonicalSpecID = %v, want %s", got, "docker:docker1-connectivity")
}
}
func TestCheckDockerContainerStateAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
host := models.DockerHost{
ID: "host-1",
DisplayName: "Docker Host",
Hostname: "docker.local",
Containers: []models.DockerContainer{
{
ID: "container-1",
Name: "web",
State: "exited",
Status: "Exited (1) seconds ago",
},
},
}
m.CheckDockerHost(host)
m.CheckDockerHost(host)
resourceID := dockerResourceID(host.ID, "container-1")
alert := activeAlert(t, m, "docker-container-state-"+resourceID)
if got := alert.Metadata["canonicalAlertKind"]; got != "discrete-state" {
t.Fatalf("canonicalAlertKind = %v, want discrete-state", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != resourceID+"-runtime-state" {
t.Fatalf("canonicalSpecID = %v, want %s", got, resourceID+"-runtime-state")
}
}
func TestCheckDockerServiceAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
host := models.DockerHost{
ID: "host-1",
DisplayName: "Prod Swarm",
Hostname: "swarm-prod",
Services: []models.DockerService{
{
ID: "svc-1",
Name: "web",
DesiredTasks: 4,
RunningTasks: 2,
Mode: "replicated",
},
},
}
m.CheckDockerHost(host)
resourceID := dockerServiceResourceID(host.ID, "svc-1", "web")
alert := activeAlert(t, m, "docker-service-health-"+resourceID)
if got := alert.Metadata["canonicalAlertKind"]; got != "service-gap" {
t.Fatalf("canonicalAlertKind = %v, want service-gap", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != resourceID+"-service-gap" {
t.Fatalf("canonicalSpecID = %v, want %s", got, resourceID+"-service-gap")
}
}
func TestCheckDockerServiceUpdateStateAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
configureUnifiedEvalManager(t, m, unifiedEvalBaseConfig())
now := time.Now()
host := models.DockerHost{
ID: "host-update",
DisplayName: "Swarm",
Hostname: "swarm.local",
Services: []models.DockerService{
{
ID: "svc-update",
Name: "api",
DesiredTasks: 1,
RunningTasks: 1,
UpdateStatus: &models.DockerServiceUpdate{
State: "rollback_failed",
Message: "Rollback failed",
CompletedAt: &now,
},
},
},
}
m.CheckDockerHost(host)
resourceID := dockerServiceResourceID(host.ID, "svc-update", "api")
alert := activeAlert(t, m, "docker-service-health-"+resourceID)
if got := alert.Metadata["canonicalAlertKind"]; got != "discrete-state" {
t.Fatalf("canonicalAlertKind = %v, want discrete-state", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != resourceID+"-update-state" {
t.Fatalf("canonicalSpecID = %v, want %s", got, resourceID+"-update-state")
}
}
func TestCheckPMGQueueDepthAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
pmg := models.PMGInstance{
ID: "pmg-1",
Name: "PMG 1",
Nodes: []models.PMGNodeStatus{
{Name: "node1", QueueStatus: &models.PMGQueueStatus{Total: 300}},
{Name: "node2", QueueStatus: &models.PMGQueueStatus{Total: 250}},
},
}
m.checkPMGQueueDepths(pmg, PMGThresholdConfig{
QueueTotalWarning: 500,
QueueTotalCritical: 1000,
})
alert := activeAlert(t, m, "pmg-1-queue-total")
if got := alert.Metadata["canonicalAlertKind"]; got != "severity-threshold" {
t.Fatalf("canonicalAlertKind = %v, want severity-threshold", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != "pmg-1-queue-total" {
t.Fatalf("canonicalSpecID = %v, want pmg-1-queue-total", got)
}
}
func TestCheckPMGOldestMessageAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
pmg := models.PMGInstance{
ID: "pmg-1",
Name: "PMG 1",
Nodes: []models.PMGNodeStatus{
{Name: "node1", QueueStatus: &models.PMGQueueStatus{OldestAge: 2400}},
},
}
m.checkPMGOldestMessage(pmg, PMGThresholdConfig{
OldestMessageWarnMins: 30,
OldestMessageCritMins: 60,
})
alert := activeAlert(t, m, "pmg-1-oldest-message")
if got := alert.Metadata["canonicalAlertKind"]; got != "severity-threshold" {
t.Fatalf("canonicalAlertKind = %v, want severity-threshold", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != "pmg-1-oldest-message" {
t.Fatalf("canonicalSpecID = %v, want pmg-1-oldest-message", got)
}
}
func TestCheckPMGNodeQueueAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
pmg := models.PMGInstance{
ID: "pmg-1",
Name: "PMG 1",
Nodes: []models.PMGNodeStatus{
{Name: "node-a", QueueStatus: &models.PMGQueueStatus{Total: 80}},
},
}
m.checkPMGNodeQueues(pmg, PMGThresholdConfig{
QueueTotalWarning: 100,
QueueTotalCritical: 200,
})
alert := activeAlert(t, m, "pmg-1-node-a-queue-total")
if got := alert.Metadata["canonicalAlertKind"]; got != "severity-threshold" {
t.Fatalf("canonicalAlertKind = %v, want severity-threshold", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != "pmg-1-node-a-queue-total" {
t.Fatalf("canonicalSpecID = %v, want pmg-1-node-a-queue-total", got)
}
}
func TestCheckPMGQuarantineAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
pmg := models.PMGInstance{
ID: "pmg-1",
Name: "PMG 1",
Quarantine: &models.PMGQuarantineTotals{
Spam: 2500,
},
}
m.checkPMGQuarantineBacklog(pmg, PMGThresholdConfig{
QuarantineSpamWarn: 2000,
QuarantineSpamCritical: 5000,
})
alert := activeAlert(t, m, "pmg-1-quarantine-spam")
if got := alert.Metadata["canonicalAlertKind"]; got != "change-threshold" {
t.Fatalf("canonicalAlertKind = %v, want change-threshold", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != "pmg-1-quarantine-spam" {
t.Fatalf("canonicalSpecID = %v, want pmg-1-quarantine-spam", got)
}
}
func TestCheckPMGAnomalyAnnotatesCanonicalSpecMetadata(t *testing.T) {
m := newTestManager(t)
base := time.Now().Add(-13 * time.Hour)
tracker := &pmgAnomalyTracker{
Samples: make([]pmgMailMetricSample, 0, 12),
LastSampleTime: base.Add(11 * time.Hour),
SampleCount: 12,
}
for i := 0; i < 12; i++ {
tracker.Samples = append(tracker.Samples, pmgMailMetricSample{
SpamIn: 100,
SpamOut: 10,
VirusIn: 1,
VirusOut: 1,
Timestamp: base.Add(time.Duration(i) * time.Hour),
})
}
m.mu.Lock()
m.pmgAnomalyTrackers["pmg-1"] = tracker
m.mu.Unlock()
pmg := models.PMGInstance{
ID: "pmg-1",
Name: "PMG 1",
MailCount: []models.PMGMailCountPoint{
{Timestamp: base.Add(12 * time.Hour), SpamIn: 420},
},
}
m.checkPMGAnomalies(pmg, PMGThresholdConfig{})
pmg.MailCount = []models.PMGMailCountPoint{
{Timestamp: base.Add(13 * time.Hour), SpamIn: 430},
}
m.checkPMGAnomalies(pmg, PMGThresholdConfig{})
alert := activeAlert(t, m, "pmg-1-anomaly-spamIn")
if got := alert.Metadata["canonicalAlertKind"]; got != "baseline-anomaly" {
t.Fatalf("canonicalAlertKind = %v, want baseline-anomaly", got)
}
if got := alert.Metadata["canonicalSpecID"]; got != "pmg-1-anomaly-spamIn" {
t.Fatalf("canonicalSpecID = %v, want pmg-1-anomaly-spamIn", got)
}
}