Pulse/internal/alerts/alerts_test.go

package alerts

import (
	"encoding/json"
	"fmt"
	"math"
	"os"
	"path/filepath"
	"reflect"
	"strings"
	"sync"
	"testing"
	"time"

	"github.com/rcourtman/pulse-go-rewrite/internal/models"
	"github.com/rcourtman/pulse-go-rewrite/internal/utils"
	"github.com/rcourtman/pulse-go-rewrite/pkg/proxmox"
)

// testEnvMu protects concurrent access to PULSE_DATA_DIR during parallel tests.
// Tests using newTestManager are effectively serialized because the Manager
// calls GetDataDir() repeatedly (not just at creation time).
var testEnvMu sync.Mutex

// newTestManager creates a Manager with an isolated temp directory for testing.
// It uses os.Setenv with a mutex to safely handle parallel tests that call // t.Parallel()
// before invoking this function (t.Setenv cannot be used after t.Parallel).
//
// IMPORTANT: The mutex is held for the entire duration of the test because the
// Manager calls GetDataDir() not just at creation time, but also during operations
// like SaveActiveAlerts() and LoadActiveAlerts(). This effectively serializes
// tests that use newTestManager, but ensures correct isolation.
func newTestManager(t *testing.T) *Manager {
	t.Helper()

	tmpDir := t.TempDir()

	testEnvMu.Lock()
	oldVal, hadOld := os.LookupEnv("PULSE_DATA_DIR")
	os.Setenv("PULSE_DATA_DIR", tmpDir)

	m := NewManager()

	// Restore env var and release mutex when test completes.
	// We also stop the history manager's background goroutines (but not the
	// full manager Stop which includes a 100ms sleep) to prevent writes to
	// the temp directory after the test completes.
	t.Cleanup(func() {
		// Stop the history manager to halt background save routines
		m.historyManager.Stop()
		// Close escalation channel to stop that goroutine too
		select {
		case <-m.escalationStop:
			// Already closed
		default:
			close(m.escalationStop)
		}
		// Close cleanup channel
		select {
		case <-m.cleanupStop:
			// Already closed
		default:
			close(m.cleanupStop)
		}
		// Brief pause to let goroutines finish any in-flight operations.
		// Under heavy parallel test load (full suite), 10ms is not enough.
		time.Sleep(50 * time.Millisecond)

		if hadOld {
			os.Setenv("PULSE_DATA_DIR", oldVal)
		} else {
			os.Unsetenv("PULSE_DATA_DIR")
		}
		testEnvMu.Unlock()
	})

	return m
}

func TestAcknowledgePersistsThroughCheckMetric(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()
	// Set config fields directly to bypass UpdateConfig's default value enforcement
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.config.SuppressionWindow = 0
	m.config.MinimumDelta = 0
	m.mu.Unlock()

	threshold := &HysteresisThreshold{Trigger: 80, Clear: 70}
	m.checkMetric("res1", "Resource", "node1", "inst1", "guest", "usage", 90, threshold, nil)
	if _, exists := m.activeAlerts["res1-usage"]; !exists {
		t.Fatalf("expected alert to be created")
	}

	if err := m.AcknowledgeAlert("res1-usage", "tester"); err != nil {
		t.Fatalf("ack failed: %v", err)
	}

	if !m.activeAlerts["res1-usage"].Acknowledged {
		t.Fatalf("acknowledged flag not set")
	}

	alerts := m.GetActiveAlerts()
	if len(alerts) != 1 || !alerts[0].Acknowledged {
		t.Fatalf("GetActiveAlerts lost acknowledgement")
	}

	m.checkMetric("res1", "Resource", "node1", "inst1", "guest", "usage", 85, threshold, nil)
	if !m.activeAlerts["res1-usage"].Acknowledged {
		t.Fatalf("acknowledged flag lost after update")
	}
}

func TestCheckMetricClearsAlertWhenThresholdDisabled(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.config.SuppressionWindow = 0
	m.config.MinimumDelta = 0
	m.mu.Unlock()

	// First, create an active alert with an enabled threshold
	threshold := &HysteresisThreshold{Trigger: 80, Clear: 70}
	m.checkMetric("res1", "Resource", "node1", "inst1", "guest", "memory", 90, threshold, nil)

	m.mu.RLock()
	_, exists := m.activeAlerts["res1-memory"]
	m.mu.RUnlock()
	if !exists {
		t.Fatalf("expected alert to be created")
	}

	// Now call checkMetric with a disabled threshold (Trigger=0) — should clear the alert
	disabledThreshold := &HysteresisThreshold{Trigger: 0, Clear: 0}
	m.checkMetric("res1", "Resource", "node1", "inst1", "guest", "memory", 90, disabledThreshold, nil)

	m.mu.RLock()
	_, stillExists := m.activeAlerts["res1-memory"]
	m.mu.RUnlock()
	if stillExists {
		t.Errorf("expected alert to be cleared when threshold is disabled (Trigger=0)")
	}

	// Also test with nil threshold
	// Re-create the alert
	m.checkMetric("res1", "Resource", "node1", "inst1", "guest", "memory", 90, threshold, nil)
	m.mu.RLock()
	_, exists = m.activeAlerts["res1-memory"]
	m.mu.RUnlock()
	if !exists {
		t.Fatalf("expected alert to be re-created")
	}

	// Call with nil threshold — should also clear
	m.checkMetric("res1", "Resource", "node1", "inst1", "guest", "memory", 90, nil, nil)

	m.mu.RLock()
	_, stillExists = m.activeAlerts["res1-memory"]
	m.mu.RUnlock()
	if stillExists {
		t.Errorf("expected alert to be cleared when threshold is nil")
	}
}

func TestGetActiveAlertsKeepsInstanceScopedNodeDisplayNames(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.config.SuppressionWindow = 0
	m.config.MinimumDelta = 0
	m.mu.Unlock()

	threshold := &HysteresisThreshold{Trigger: 80, Clear: 70}

	m.UpdateNodeDisplayName("cluster-a", "pve", "Alpha")
	m.UpdateNodeDisplayName("cluster-b", "pve", "Beta")

	m.checkMetric("guest-a", "vm-a", "pve", "cluster-a", "guest", "cpu", 90, threshold, nil)
	m.checkMetric("guest-b", "vm-b", "pve", "cluster-b", "guest", "cpu", 91, threshold, nil)

	m.UpdateNodeDisplayName("cluster-a", "pve", "Alpha Updated")
	m.checkMetric("guest-a", "vm-a", "pve", "cluster-a", "guest", "cpu", 92, threshold, nil)
	m.checkMetric("guest-b", "vm-b", "pve", "cluster-b", "guest", "cpu", 93, threshold, nil)

	gotByID := make(map[string]Alert)
	for _, alert := range m.GetActiveAlerts() {
		gotByID[alert.ID] = alert
	}

	if got := gotByID["guest-a-cpu"].NodeDisplayName; got != "Alpha Updated" {
		t.Fatalf("guest-a NodeDisplayName = %q, want %q", got, "Alpha Updated")
	}
	if got := gotByID["guest-b-cpu"].NodeDisplayName; got != "Beta" {
		t.Fatalf("guest-b NodeDisplayName = %q, want %q", got, "Beta")
	}
}

func TestCheckGuestSkipsAlertsWhenMetricDisabled(t *testing.T) {
	m := newTestManager(t)

	vmID := "instance-node-101"
	instanceName := "instance"

	// Start with default configuration to allow CPU alerts.
	initialConfig := AlertConfig{
		Enabled: true,
		GuestDefaults: ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80, Clear: 75},
		},
		TimeThreshold:  0,
		TimeThresholds: map[string]int{},
		NodeDefaults: ThresholdConfig{
			CPU:    &HysteresisThreshold{Trigger: 80, Clear: 75},
			Memory: &HysteresisThreshold{Trigger: 85, Clear: 80},
			Disk:   &HysteresisThreshold{Trigger: 90, Clear: 85},
		},
		StorageDefault: HysteresisThreshold{Trigger: 85, Clear: 80},
		Overrides:      make(map[string]ThresholdConfig),
	}
	m.UpdateConfig(initialConfig)
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.config.ActivationState = ActivationActive
	m.mu.Unlock()

	var dispatched []*Alert
	done := make(chan struct{}, 1)
	var resolved []string
	resolvedDone := make(chan struct{}, 1)
	m.SetAlertCallback(func(alert *Alert) {
		dispatched = append(dispatched, alert)
		select {
		case done <- struct{}{}:
		default:
		}
	})
	m.SetResolvedCallback(func(alertID string) {
		resolved = append(resolved, alertID)
		select {
		case resolvedDone <- struct{}{}:
		default:
		}
	})

	vm := models.VM{
		ID:       vmID,
		Name:     "test-vm",
		Node:     "node",
		Instance: instanceName,
		Status:   "running",
		CPU:      1.0, // 100% once multiplied by 100 inside CheckGuest
		Memory: models.Memory{
			Usage: 65,
		},
		Disk: models.Disk{
			Usage: 40,
		},
	}

	// Initial check should trigger an alert with default thresholds.
	m.CheckGuest(vm, instanceName)
	select {
	case <-done:
	case <-time.After(100 * time.Millisecond):
		t.Fatalf("did not receive initial alert dispatch")
	}
	if len(dispatched) != 1 {
		t.Fatalf("expected 1 alert before disabling metric, got %d", len(dispatched))
	}

	// Apply override disabling CPU alerts for this VM.
	disabledConfig := initialConfig
	disabledConfig.Overrides = map[string]ThresholdConfig{
		vmID: {
			CPU: &HysteresisThreshold{Trigger: -1, Clear: 0},
		},
	}
	disabledConfig.TimeThreshold = 0
	disabledConfig.TimeThresholds = map[string]int{}
	m.UpdateConfig(disabledConfig)
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.config.ActivationState = ActivationActive
	m.mu.Unlock()

	// Clear dispatched slice to capture only post-disable notifications.
	dispatched = dispatched[:0]
	done = make(chan struct{}, 1)

	// Re-run evaluation with high CPU; no alert should be dispatched.
	m.CheckGuest(vm, instanceName)
	select {
	case <-done:
		t.Fatalf("expected no alerts after disabling CPU metric, but callback fired")
	case <-time.After(100 * time.Millisecond):
		// No callback fired as expected.
	}

	// Active alerts should be cleared by the config update.
	m.mu.RLock()
	activeCount := len(m.activeAlerts)
	m.mu.RUnlock()
	if activeCount != 0 {
		t.Fatalf("expected active alerts to be cleared after disabling metric, got %d", activeCount)
	}

	select {
	case <-resolvedDone:
	case <-time.After(100 * time.Millisecond):
		t.Fatalf("expected resolved callback to fire after disabling metric")
	}
	if len(resolved) != 1 || resolved[0] != fmt.Sprintf("%s-cpu", vmID) {
		t.Fatalf("expected resolved callback for %s-cpu, got %v", vmID, resolved)
	}

	m.mu.RLock()
	_, isPending := m.pendingAlerts[fmt.Sprintf("%s-cpu", vmID)]
	m.mu.RUnlock()
	if isPending {
		t.Fatalf("expected pending alert entry to be cleared after disabling metric")
	}
}

func TestPulseNoAlertsSuppressesGuestAlerts(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.config.ActivationState = ActivationActive
	m.mu.Unlock()

	var dispatched int
	m.SetAlertCallback(func(alert *Alert) {
		dispatched++
	})

	vm := models.VM{
		ID:       "inst/qemu/101",
		Name:     "test-vm",
		Node:     "node1",
		Instance: "inst",
		Status:   "running",
		CPU:      1.0,
		Memory: models.Memory{
			Usage: 95,
		},
		Disk: models.Disk{
			Usage: 95,
		},
		Tags: []string{"pulse-no-alerts"},
	}

	m.CheckGuest(vm, "inst")

	if dispatched != 0 {
		t.Fatalf("expected no alert dispatch, got %d", dispatched)
	}

	if alerts := m.GetActiveAlerts(); len(alerts) != 0 {
		t.Fatalf("expected no active alerts, got %d", len(alerts))
	}
}

func TestPulseMonitorOnlySkipsDispatchButRetainsAlert(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.config.ActivationState = ActivationActive
	m.mu.Unlock()

	var dispatched int
	m.SetAlertCallback(func(alert *Alert) {
		dispatched++
	})

	vm := models.VM{
		ID:       "inst/qemu/102",
		Name:     "monitor-vm",
		Node:     "node1",
		Instance: "inst",
		Status:   "running",
		CPU:      1.0,
		Memory:   models.Memory{Usage: 90},
		Disk:     models.Disk{Usage: 50},
		Tags:     []string{"pulse-monitor-only"},
	}

	m.CheckGuest(vm, "inst")

	if dispatched != 0 {
		t.Fatalf("expected monitor-only alert to skip dispatch, got %d callbacks", dispatched)
	}

	alerts := m.GetActiveAlerts()
	if len(alerts) == 0 {
		t.Fatalf("expected monitor-only alert to remain active")
	}

	if alerts[0].Metadata == nil || alerts[0].Metadata["monitorOnly"] != true {
		t.Fatalf("expected alert metadata to mark monitorOnly, got %+v", alerts[0].Metadata)
	}
}

func TestPulseRelaxedThresholdsIncreaseCpuTrigger(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.mu.Unlock()

	vm := models.VM{
		ID:       "inst/qemu/103",
		Name:     "relaxed-vm",
		Node:     "node1",
		Instance: "inst",
		Status:   "running",
		CPU:      0.9, // 90%
		Memory:   models.Memory{Usage: 60},
		Disk:     models.Disk{Usage: 40},
		Tags:     []string{"pulse-relaxed"},
	}

	m.CheckGuest(vm, "inst")

	if alerts := m.GetActiveAlerts(); len(alerts) != 0 {
		t.Fatalf("expected no alerts at 90%% CPU with relaxed thresholds, got %d", len(alerts))
	}

	vm.CPU = 1.0
	m.CheckGuest(vm, "inst")

	if alerts := m.GetActiveAlerts(); len(alerts) == 0 {
		t.Fatalf("expected alert once CPU exceeds relaxed threshold")
	}
}

func TestClearAlertMarksResolutionAndReturnsStatus(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.mu.Unlock()

	vm := models.VM{
		ID:       "inst/qemu/104",
		Name:     "clear-vm",
		Node:     "node1",
		Instance: "inst",
		Status:   "running",
		CPU:      1.0,
		Memory:   models.Memory{Usage: 80},
		Disk:     models.Disk{Usage: 80},
	}

	m.CheckGuest(vm, "inst")
	alerts := m.GetActiveAlerts()
	if len(alerts) == 0 {
		t.Fatalf("expected alert to be active before clearing")
	}

	alertID := alerts[0].ID
	if ok := m.ClearAlert(alertID); !ok {
		t.Fatalf("expected manual clear to succeed")
	}

	if remaining := m.GetActiveAlerts(); len(remaining) != 0 {
		t.Fatalf("expected no active alerts after clear, found %d", len(remaining))
	}

	resolved := m.GetRecentlyResolved()
	if len(resolved) == 0 || resolved[0].Alert.ID != alertID {
		t.Fatalf("expected alert %s to be tracked as recently resolved", alertID)
	}

	if ok := m.ClearAlert(alertID); ok {
		t.Fatalf("expected second clear to report missing alert")
	}
}

func TestHandleDockerHostRemovedClearsAlertsAndTracking(t *testing.T) {
	m := newTestManager(t)
	host := models.DockerHost{ID: "host1", DisplayName: "Host One", Hostname: "host-one"}
	containerResourceID := "docker:host1/container1"
	containerAlertID := "docker-container-state-" + containerResourceID
	hostAlertID := "docker-host-offline-host1"

	m.mu.Lock()
	m.activeAlerts[hostAlertID] = &Alert{ID: hostAlertID, ResourceID: "docker:host1"}
	m.activeAlerts[containerAlertID] = &Alert{ID: containerAlertID, ResourceID: containerResourceID}
	m.dockerOfflineCount[host.ID] = 2
	m.dockerStateConfirm[containerResourceID] = 1
	m.dockerRestartTracking[containerResourceID] = &dockerRestartRecord{}
	m.dockerLastExitCode[containerResourceID] = 137
	m.mu.Unlock()

	m.HandleDockerHostRemoved(host)

	m.mu.RLock()
	defer m.mu.RUnlock()

	if _, exists := m.activeAlerts[containerAlertID]; exists {
		t.Fatalf("expected container alerts to be cleared")
	}
	if _, exists := m.activeAlerts[hostAlertID]; exists {
		t.Fatalf("expected host offline alert to be cleared")
	}
	if _, exists := m.dockerOfflineCount[host.ID]; exists {
		t.Fatalf("expected offline tracking to be cleared")
	}
	if _, exists := m.dockerStateConfirm[containerResourceID]; exists {
		t.Fatalf("expected state confirmation to be cleared")
	}
	if _, exists := m.dockerRestartTracking[containerResourceID]; exists {
		t.Fatalf("expected restart tracking to be cleared")
	}
	if _, exists := m.dockerLastExitCode[containerResourceID]; exists {
		t.Fatalf("expected last exit code tracking to be cleared")
	}
}

func TestCheckHostGeneratesMetricAlerts(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.mu.Unlock()

	host := models.Host{
		ID:          "host-1",
		DisplayName: "Test Host",
		Hostname:    "host-1.example",
		Platform:    "linux",
		OSName:      "ubuntu",
		CPUUsage:    95,
		CPUCount:    8,
		Memory: models.Memory{
			Usage: 92,
			Total: 16384,
			Used:  15000,
			Free:  1384,
		},
		Disks: []models.Disk{
			{
				Mountpoint: "/",
				Usage:      93,
				Total:      100,
				Used:       93,
				Free:       7,
			},
		},
		Status:          "online",
		IntervalSeconds: 30,
		LastSeen:        time.Now(),
		Tags:            []string{"prod"},
	}

	m.CheckHost(host)

	m.mu.RLock()
	defer m.mu.RUnlock()

	cpuAlertID := fmt.Sprintf("%s-cpu", hostResourceID(host.ID))
	if _, exists := m.activeAlerts[cpuAlertID]; !exists {
		t.Fatalf("expected CPU alert %q to be active", cpuAlertID)
	}

	memAlertID := fmt.Sprintf("%s-memory", hostResourceID(host.ID))
	if _, exists := m.activeAlerts[memAlertID]; !exists {
		t.Fatalf("expected memory alert %q to be active", memAlertID)
	}

	diskResourceID, _ := hostDiskResourceID(host, host.Disks[0])
	diskAlertID := fmt.Sprintf("%s-disk", diskResourceID)
	if _, exists := m.activeAlerts[diskAlertID]; !exists {
		t.Fatalf("expected disk alert %q to be active", diskAlertID)
	}
}

func TestHandleHostOfflineRequiresConfirmations(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()
	host := models.Host{ID: "host-2", DisplayName: "Second Host", Hostname: "host-two"}
	alertID := fmt.Sprintf("host-offline-%s", host.ID)
	resourceKey := hostResourceID(host.ID)

	m.HandleHostOffline(host)
	m.mu.RLock()
	if _, exists := m.activeAlerts[alertID]; exists {
		t.Fatalf("expected no alert after first offline detection")
	}
	if count := m.offlineConfirmations[resourceKey]; count != 1 {
		t.Fatalf("expected confirmation count to be 1, got %d", count)
	}
	m.mu.RUnlock()

	m.HandleHostOffline(host)
	m.mu.RLock()
	if _, exists := m.activeAlerts[alertID]; exists {
		t.Fatalf("expected no alert after second offline detection")
	}
	if count := m.offlineConfirmations[resourceKey]; count != 2 {
		t.Fatalf("expected confirmation count to be 2, got %d", count)
	}
	m.mu.RUnlock()

	m.HandleHostOffline(host)
	m.mu.RLock()
	if _, exists := m.activeAlerts[alertID]; !exists {
		t.Fatalf("expected alert %q after third offline detection", alertID)
	}
	m.mu.RUnlock()

	m.HandleHostOnline(host)
	m.mu.RLock()
	if _, exists := m.activeAlerts[alertID]; exists {
		t.Fatalf("expected offline alert %q to be cleared after host online", alertID)
	}
	if _, exists := m.offlineConfirmations[resourceKey]; exists {
		t.Fatalf("expected offline confirmations to be cleared when host online")
	}
	m.mu.RUnlock()
}

func TestCheckHostDisabledOverrideClearsAlerts(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.mu.Unlock()

	host := models.Host{
		ID:          "host-3",
		DisplayName: "Override Host",
		Hostname:    "override.example",
		CPUUsage:    90,
		Memory: models.Memory{
			Usage: 91,
			Total: 16000,
			Used:  14560,
			Free:  1440,
		},
		Disks: []models.Disk{
			{Mountpoint: "/data", Usage: 92, Total: 200, Used: 184, Free: 16},
		},
		Status:          "online",
		IntervalSeconds: 30,
		LastSeen:        time.Now(),
	}

	m.CheckHost(host)

	m.mu.RLock()
	if len(m.activeAlerts) == 0 {
		m.mu.RUnlock()
		t.Fatalf("expected active alerts prior to disabling host overrides")
	}
	m.mu.RUnlock()

	cfg := m.GetConfig()
	cfg.Overrides = map[string]ThresholdConfig{
		host.ID: {
			Disabled: true,
		},
	}
	m.UpdateConfig(cfg)
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.mu.Unlock()

	m.CheckHost(host)

	m.mu.RLock()
	defer m.mu.RUnlock()
	if len(m.activeAlerts) != 0 {
		t.Fatalf("expected all host alerts to be cleared after disabling override, got %d", len(m.activeAlerts))
	}
}

func TestCheckSnapshotsForInstanceCreatesAndClearsAlerts(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	cfg := AlertConfig{
		Enabled:        true,
		StorageDefault: HysteresisThreshold{Trigger: 85, Clear: 80},
		SnapshotDefaults: SnapshotAlertConfig{
			Enabled:         true,
			WarningDays:     7,
			CriticalDays:    14,
			WarningSizeGiB:  0,
			CriticalSizeGiB: 0,
		},
		Overrides: make(map[string]ThresholdConfig),
	}
	m.UpdateConfig(cfg)
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.mu.Unlock()

	now := time.Now()
	snapshots := []models.GuestSnapshot{
		{
			ID:        "inst-node-100-weekly",
			Name:      "weekly",
			Node:      "node",
			Instance:  "inst",
			Type:      "qemu",
			VMID:      100,
			Time:      now.Add(-15 * 24 * time.Hour),
			SizeBytes: 60 << 30,
		},
	}
	guestNames := map[string]string{
		"inst:node:100": "app-server",
	}

	m.CheckSnapshotsForInstance("inst", snapshots, guestNames)

	m.mu.RLock()
	alert, exists := m.activeAlerts["snapshot-age-inst-node-100-weekly"]
	m.mu.RUnlock()
	if !exists {
		t.Fatalf("expected snapshot age alert to be created")
	}
	if alert.Level != AlertLevelCritical {
		t.Fatalf("expected critical level for old snapshot, got %s", alert.Level)
	}
	if alert.ResourceName != "app-server snapshot 'weekly'" {
		t.Fatalf("unexpected resource name: %s", alert.ResourceName)
	}

	m.CheckSnapshotsForInstance("inst", nil, guestNames)

	m.mu.RLock()
	_, exists = m.activeAlerts["snapshot-age-inst-node-100-weekly"]
	m.mu.RUnlock()
	if exists {
		t.Fatalf("expected snapshot alert to be cleared when snapshot missing")
	}
}

func TestCheckSnapshotsRespectsOverrides(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	cfg := AlertConfig{
		Enabled: true,
		SnapshotDefaults: SnapshotAlertConfig{
			Enabled:      true,
			WarningDays:  7,
			CriticalDays: 14,
		},
	}
	m.UpdateConfig(cfg)
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.mu.Unlock()

	now := time.Now()
	snapshots := []models.GuestSnapshot{
		{
			ID:       "inst:node:100:weekly",
			Name:     "weekly",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     100,
			Time:     now.Add(-10 * 24 * time.Hour), // Triggers Warning (10 > 7)
		},
	}
	resourceKey := "inst:node:100"
	guestNames := map[string]string{
		resourceKey: "app-server",
	}

	// 1. Verify warning alert is created
	m.CheckSnapshotsForInstance("inst", snapshots, guestNames)
	m.mu.RLock()
	alert, exists := m.activeAlerts["snapshot-age-inst:node:100:weekly"]
	m.mu.RUnlock()
	if !exists {
		t.Fatalf("expected snapshot warning alert")
	}
	if alert.Level != AlertLevelWarning {
		t.Fatalf("expected warning alert, got %s", alert.Level)
	}

	// 2. Disable via override
	cfg = m.GetConfig()
	cfg.Overrides = map[string]ThresholdConfig{
		"inst:node:100": {
			Snapshot: &SnapshotAlertConfig{Enabled: false},
		},
	}
	m.UpdateConfig(cfg)
	m.CheckSnapshotsForInstance("inst", snapshots, guestNames)
	m.mu.RLock()
	_, exists = m.activeAlerts["snapshot-age-inst:node:100:weekly"]
	m.mu.RUnlock()
	if exists {
		t.Fatalf("expected snapshot alert to be suppressed by override")
	}
}

func TestCheckSnapshotsForInstanceTriggersOnSnapshotSize(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	cfg := AlertConfig{
		Enabled:        true,
		StorageDefault: HysteresisThreshold{Trigger: 85, Clear: 80},
		SnapshotDefaults: SnapshotAlertConfig{
			Enabled:         true,
			WarningDays:     0,
			CriticalDays:    0,
			WarningSizeGiB:  50,
			CriticalSizeGiB: 100,
		},
		Overrides: make(map[string]ThresholdConfig),
	}
	m.UpdateConfig(cfg)
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.mu.Unlock()

	now := time.Now()
	snapshots := []models.GuestSnapshot{
		{
			ID:        "inst-node-200-sizey",
			Name:      "pre-maintenance",
			Node:      "node",
			Instance:  "inst",
			Type:      "qemu",
			VMID:      200,
			Time:      now.Add(-2 * time.Hour),
			SizeBytes: int64(120) << 30,
		},
	}
	guestNames := map[string]string{
		"inst:node:200": "db-server",
	}

	m.CheckSnapshotsForInstance("inst", snapshots, guestNames)

	m.mu.RLock()
	alert, exists := m.activeAlerts["snapshot-age-inst-node-200-sizey"]
	m.mu.RUnlock()
	if !exists {
		t.Fatalf("expected snapshot size alert to be created")
	}
	if alert.Level != AlertLevelCritical {
		t.Fatalf("expected critical level for large snapshot, got %s", alert.Level)
	}
	if alert.Value < 119.5 || alert.Value > 120.5 {
		t.Fatalf("expected alert value near 120 GiB, got %.2f", alert.Value)
	}
	if alert.Threshold != 100 {
		t.Fatalf("expected threshold 100 GiB, got %.2f", alert.Threshold)
	}
	if alert.Metadata == nil {
		t.Fatalf("expected metadata for snapshot alert")
	}
	if metric, ok := alert.Metadata["primaryMetric"].(string); !ok || metric != "size" {
		t.Fatalf("expected primary metric size, got %#v", alert.Metadata["primaryMetric"])
	}
	if sizeBytes, ok := alert.Metadata["snapshotSizeBytes"].(int64); !ok || sizeBytes == 0 {
		t.Fatalf("expected snapshotSizeBytes in metadata")
	}
	metrics, ok := alert.Metadata["triggeredMetrics"].([]string)
	if !ok {
		t.Fatalf("expected triggeredMetrics slice, got %#v", alert.Metadata["triggeredMetrics"])
	}
	foundSize := false
	for _, metric := range metrics {
		if metric == "size" {
			foundSize = true
			break
		}
	}
	if !foundSize {
		t.Fatalf("expected size metric recorded in metadata")
	}
}

func TestCheckSnapshotsForInstanceIncludesAgeAndSizeReasons(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	cfg := AlertConfig{
		Enabled:        true,
		StorageDefault: HysteresisThreshold{Trigger: 85, Clear: 80},
		SnapshotDefaults: SnapshotAlertConfig{
			Enabled:         true,
			WarningDays:     5,
			CriticalDays:    10,
			WarningSizeGiB:  40,
			CriticalSizeGiB: 80,
		},
		Overrides: make(map[string]ThresholdConfig),
	}
	m.UpdateConfig(cfg)
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.mu.Unlock()

	now := time.Now()
	snapshots := []models.GuestSnapshot{
		{
			ID:        "inst-node-300-combined",
			Name:      "long-running",
			Node:      "node",
			Instance:  "inst",
			Type:      "qemu",
			VMID:      300,
			Time:      now.Add(-15 * 24 * time.Hour),
			SizeBytes: int64(90) << 30,
		},
	}
	guestNames := map[string]string{
		"inst:node:300": "app-server",
	}

	m.CheckSnapshotsForInstance("inst", snapshots, guestNames)

	m.mu.RLock()
	alert, exists := m.activeAlerts["snapshot-age-inst-node-300-combined"]
	m.mu.RUnlock()
	if !exists {
		t.Fatalf("expected combined snapshot alert to be created")
	}
	if alert.Level != AlertLevelCritical {
		t.Fatalf("expected critical level, got %s", alert.Level)
	}
	if !strings.Contains(alert.Message, "days old") || !strings.Contains(strings.ToLower(alert.Message), "gib") {
		t.Fatalf("expected alert message to reference age and size, got %q", alert.Message)
	}
	if alert.Metadata == nil {
		t.Fatalf("expected metadata for combined alert")
	}
	metrics, ok := alert.Metadata["triggeredMetrics"].([]string)
	if !ok {
		t.Fatalf("expected triggeredMetrics slice, got %#v", alert.Metadata["triggeredMetrics"])
	}
	if len(metrics) < 2 {
		t.Fatalf("expected both age and size metrics recorded, got %v", metrics)
	}
	if metric, ok := alert.Metadata["primaryMetric"].(string); !ok || metric != "age" {
		t.Fatalf("expected primary metric age, got %#v", alert.Metadata["primaryMetric"])
	}
}

func TestCheckBackupsCreatesAndClearsAlerts(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:      true,
		WarningDays:  7,
		CriticalDays: 14,
	}
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.mu.Unlock()

	now := time.Now()
	storageBackups := []models.StorageBackup{
		{
			ID:       "inst-node-100-backup",
			Storage:  "local",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     100,
			Time:     now.Add(-15 * 24 * time.Hour),
		},
	}

	key := BuildGuestKey("inst", "node", 100)
	guestsByKey := map[string]GuestLookup{
		key: {
			ResourceID: "qemu/100",
			Name:       "app-server",
			Instance:   "inst",
			Node:       "node",
			Type:       "qemu",
			VMID:       100,
		},
	}
	guestsByVMID := map[string][]GuestLookup{
		"100": {guestsByKey[key]},
	}

	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	alert, exists := m.activeAlerts["backup-age-"+sanitizeAlertKey(key)]
	m.mu.RUnlock()
	if !exists {
		t.Fatalf("expected backup age alert to be created")
	}
	if alert.Level != AlertLevelCritical {
		t.Fatalf("expected critical backup alert, got %s", alert.Level)
	}

	// Recent backup clears alert
	storageBackups[0].Time = now
	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	_, exists = m.activeAlerts["backup-age-"+sanitizeAlertKey(key)]
	m.mu.RUnlock()
	if exists {
		t.Fatalf("expected backup-age alert to clear after fresh backup")
	}
}

func TestCheckBackupsRespectsOverrides(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:      true,
		WarningDays:  7,
		CriticalDays: 14,
	}
	m.config.TimeThreshold = 0
	m.mu.Unlock()

	now := time.Now()
	storageBackups := []models.StorageBackup{
		{
			ID:       "inst-node-100-backup",
			Storage:  "local",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     100,
			Time:     now.Add(-10 * 24 * time.Hour), // Triggers Warning (10 > 7)
		},
	}

	key := BuildGuestKey("inst", "node", 100)
	resourceID := "inst:node:100"
	guestsByKey := map[string]GuestLookup{
		key: {
			ResourceID: resourceID,
			Name:       "app-server",
			Instance:   "inst",
			Node:       "node",
			Type:       "qemu",
			VMID:       100,
		},
	}
	guestsByVMID := map[string][]GuestLookup{
		"100": {guestsByKey[key]},
	}

	// 1. Verify warning alert is created with defaults
	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
	m.mu.RLock()
	alert, exists := m.activeAlerts["backup-age-"+sanitizeAlertKey(key)]
	m.mu.RUnlock()
	if !exists {
		t.Fatalf("expected backup warning alert")
	}
	if alert.Level != AlertLevelWarning {
		t.Fatalf("expected warning alert, got %s", alert.Level)
	}

	// 2. Apply override to disable backup alerts for this guest
	cfg := m.GetConfig()
	cfg.Overrides = map[string]ThresholdConfig{
		resourceID: {
			Backup: &BackupAlertConfig{Enabled: false},
		},
	}
	m.UpdateConfig(cfg)

	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
	m.mu.RLock()
	_, exists = m.activeAlerts["backup-age-"+sanitizeAlertKey(key)]
	m.mu.RUnlock()
	if exists {
		t.Fatalf("expected backup alert to be cleared/suppressed by override")
	}

	// 3. Apply override to change thresholds
	cfg.Overrides[resourceID] = ThresholdConfig{
		Backup: &BackupAlertConfig{
			Enabled:      true,
			WarningDays:  15, // 10 < 15, so no alert
			CriticalDays: 20,
		},
	}
	m.UpdateConfig(cfg)
	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
	m.mu.RLock()
	_, exists = m.activeAlerts["backup-age-"+sanitizeAlertKey(key)]
	m.mu.RUnlock()
	if exists {
		t.Fatalf("expected no backup alert with increased thresholds in override")
	}

	// 4. Test global guest disable
	cfg.Overrides[resourceID] = ThresholdConfig{
		Disabled: true,
	}
	m.UpdateConfig(cfg)
	storageBackups[0].Time = now.Add(-30 * 24 * time.Hour) // Way past defaults
	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)
	m.mu.RLock()
	_, exists = m.activeAlerts["backup-age-"+sanitizeAlertKey(key)]
	m.mu.RUnlock()
	if exists {
		t.Fatalf("expected no backup alert for globally disabled guest")
	}
}

func TestCheckBackupsHandlesPbsOnlyGuests(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:      true,
		WarningDays:  3,
		CriticalDays: 5,
	}
	m.mu.Unlock()

	now := time.Now()
	pbsBackups := []models.PBSBackup{
		{
			ID:         "pbs-backup-999-0",
			Instance:   "pbs-main",
			Datastore:  "backup-store",
			BackupType: "qemu",
			VMID:       "999",
			BackupTime: now.Add(-6 * 24 * time.Hour),
		},
	}

	// Include a live sentinel guest so hasLiveInventory is true and orphan detection runs.
	sentinelKey := BuildGuestKey("inst", "snode", 9999)
	guestsByKey := map[string]GuestLookup{
		sentinelKey: {ResourceID: "qemu/9999", Name: "sentinel-vm", Instance: "inst", Node: "snode", Type: "qemu", VMID: 9999},
	}
	guestsByVMID := map[string][]GuestLookup{
		"9999": {guestsByKey[sentinelKey]},
	}

	m.CheckBackups(nil, pbsBackups, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	found := false
	for id, alert := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-orphaned-") {
			found = true
			if alert.Level != AlertLevelWarning {
				t.Fatalf("expected PBS orphaned backup alert to be warning, got %s", alert.Level)
			}
			break
		}
	}
	m.mu.RUnlock()
	if !found {
		t.Fatalf("expected PBS orphaned backup alert to be created")
	}
}

func TestCheckBackupsDisambiguatesWithNamespace(t *testing.T) {
	// Test that when multiple guests have the same VMID from different instances,
	// the namespace is used to match the backup to the correct guest.
	// This addresses issue #1095 where users have multiple PVE instances with
	// overlapping VMIDs and separate PBS instances backing them up.
	m := newTestManager(t)
	m.ClearActiveAlerts()

	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:      true,
		WarningDays:  3,
		CriticalDays: 5,
	}
	m.mu.Unlock()

	now := time.Now()

	// Two guests with the same VMID (100) but on different instances
	guestsByKey := map[string]GuestLookup{
		"pve-node1-100": {
			ResourceID: "qemu/100",
			Name:       "webserver-pve",
			Instance:   "pve",
			Node:       "node1",
			Type:       "qemu",
			VMID:       100,
		},
		"pve-nat-node2-100": {
			ResourceID: "qemu/100",
			Name:       "webserver-nat",
			Instance:   "pve-nat",
			Node:       "node2",
			Type:       "qemu",
			VMID:       100,
		},
	}

	// Both guests have VMID "100"
	guestsByVMID := map[string][]GuestLookup{
		"100": {
			guestsByKey["pve-node1-100"],
			guestsByKey["pve-nat-node2-100"],
		},
	}

	// PBS backup with namespace "nat" should match the "pve-nat" instance
	pbsBackups := []models.PBSBackup{
		{
			ID:         "pbs-backup-100-nat",
			Instance:   "pbs-main",
			Datastore:  "backup-store",
			Namespace:  "nat", // This namespace should match "pve-nat"
			BackupType: "qemu",
			VMID:       "100",
			BackupTime: now.Add(-6 * 24 * time.Hour), // Critical
		},
	}

	m.CheckBackups(nil, pbsBackups, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	defer m.mu.RUnlock()

	// Should find an alert keyed to the pve-nat instance (node2), not pve (node1)
	expectedKey := "backup-age-pve-nat-node2-100"
	alert, exists := m.activeAlerts[expectedKey]
	if !exists {
		// List what keys we do have for debugging
		var keys []string
		for k := range m.activeAlerts {
			keys = append(keys, k)
		}
		t.Fatalf("expected alert with key %q not found; found keys: %v", expectedKey, keys)
	}

	if alert.ResourceName != "webserver-nat backup" {
		t.Errorf("expected ResourceName 'webserver-nat backup', got %q", alert.ResourceName)
	}
	if alert.Instance != "pve-nat" {
		t.Errorf("expected Instance 'pve-nat', got %q", alert.Instance)
	}
}

// TestCheckBackupsVMIDCollisionNonMatchingNamespace verifies that when multiple guests
// share a VMID and the PBS backup namespace matches none of them, the alert uses the
// generic PBS key rather than falsely attributing to a specific guest.
func TestCheckBackupsVMIDCollisionNonMatchingNamespace(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:      true,
		WarningDays:  3,
		CriticalDays: 5,
	}
	m.mu.Unlock()

	now := time.Now()

	guestsByKey := map[string]GuestLookup{
		"pve1-node1-100": {
			ResourceID: "qemu/100",
			Name:       "vm-pve1",
			Instance:   "pve1",
			Node:       "node1",
			Type:       "qemu",
			VMID:       100,
		},
		"pve2-node2-100": {
			ResourceID: "qemu/100",
			Name:       "vm-pve2",
			Instance:   "pve2",
			Node:       "node2",
			Type:       "qemu",
			VMID:       100,
		},
	}

	guestsByVMID := map[string][]GuestLookup{
		"100": {
			guestsByKey["pve1-node1-100"],
			guestsByKey["pve2-node2-100"],
		},
	}

	// PBS backup with namespace "staging" — matches neither pve1 nor pve2
	pbsBackups := []models.PBSBackup{
		{
			ID:         "pbs-100",
			Instance:   "pbs-main",
			Datastore:  "backup-store",
			Namespace:  "staging",
			BackupType: "qemu",
			VMID:       "100",
			BackupTime: now.Add(-6 * 24 * time.Hour),
		},
	}

	m.CheckBackups(nil, pbsBackups, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	defer m.mu.RUnlock()

	// Should NOT have a guest-specific alert key
	for key := range m.activeAlerts {
		if key == "backup-age-pve1-node1-100" || key == "backup-age-pve2-node2-100" {
			t.Errorf("should not attribute ambiguous backup to a specific guest, but found key %q", key)
		}
	}

	// Should have a generic PBS alert key
	expectedKey := "backup-age-pbs-pbs-main-qemu-100"
	if _, exists := m.activeAlerts[expectedKey]; !exists {
		var keys []string
		for k := range m.activeAlerts {
			keys = append(keys, k)
		}
		t.Errorf("expected generic PBS alert key %q, found keys: %v", expectedKey, keys)
	}
}

// TestCheckBackupsVMIDCollisionNoNamespace verifies that when multiple guests
// share a VMID and the PBS backup has no namespace, the alert uses the generic PBS key.
func TestCheckBackupsVMIDCollisionNoNamespace(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:      true,
		WarningDays:  3,
		CriticalDays: 5,
	}
	m.mu.Unlock()

	now := time.Now()

	guestsByKey := map[string]GuestLookup{
		"pve1-node1-100": {
			ResourceID: "qemu/100",
			Name:       "vm-pve1",
			Instance:   "pve1",
			Node:       "node1",
			Type:       "qemu",
			VMID:       100,
		},
		"pve2-node2-100": {
			ResourceID: "qemu/100",
			Name:       "vm-pve2",
			Instance:   "pve2",
			Node:       "node2",
			Type:       "qemu",
			VMID:       100,
		},
	}

	guestsByVMID := map[string][]GuestLookup{
		"100": {
			guestsByKey["pve1-node1-100"],
			guestsByKey["pve2-node2-100"],
		},
	}

	// PBS backup with NO namespace
	pbsBackups := []models.PBSBackup{
		{
			ID:         "pbs-100",
			Instance:   "pbs-main",
			Datastore:  "backup-store",
			Namespace:  "",
			BackupType: "qemu",
			VMID:       "100",
			BackupTime: now.Add(-6 * 24 * time.Hour),
		},
	}

	m.CheckBackups(nil, pbsBackups, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	defer m.mu.RUnlock()

	// Should NOT have a guest-specific alert key
	for key := range m.activeAlerts {
		if key == "backup-age-pve1-node1-100" || key == "backup-age-pve2-node2-100" {
			t.Errorf("should not attribute ambiguous backup to a specific guest, but found key %q", key)
		}
	}

	// Should have a generic PBS alert key
	expectedKey := "backup-age-pbs-pbs-main-qemu-100"
	if _, exists := m.activeAlerts[expectedKey]; !exists {
		var keys []string
		for k := range m.activeAlerts {
			keys = append(keys, k)
		}
		t.Errorf("expected generic PBS alert key %q, found keys: %v", expectedKey, keys)
	}
}

func TestCheckBackupsHandlesPmgBackups(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:      true,
		WarningDays:  5,
		CriticalDays: 7,
	}
	m.mu.Unlock()

	now := time.Now()
	pmgBackups := []models.PMGBackup{
		{
			ID:         "pmg-backup-mail-01",
			Instance:   "mail",
			Node:       "mail-gateway",
			Filename:   "pmg-backup_2024-01-01.tgz",
			BackupTime: now.Add(-8 * 24 * time.Hour),
			Size:       123456,
		},
	}

	m.CheckBackups(nil, nil, pmgBackups, map[string]GuestLookup{}, map[string][]GuestLookup{}, nil)

	m.mu.RLock()
	found := false
	for id, alert := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-age-") {
			found = true
			if alert.Level != AlertLevelCritical {
				t.Fatalf("expected PMG backup alert to be critical")
			}
			break
		}
	}
	m.mu.RUnlock()
	if !found {
		t.Fatalf("expected PMG backup alert to be created")
	}
}

func TestCheckBackupsSkipsOrphanedWhenDisabled(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	alertOrphaned := false
	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:       true,
		WarningDays:   3,
		CriticalDays:  5,
		AlertOrphaned: &alertOrphaned,
		IgnoreVMIDs:   []string{},
	}
	m.mu.Unlock()

	now := time.Now()
	storageBackups := []models.StorageBackup{
		{
			ID:       "inst-node-200-backup",
			Storage:  "local",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     200,
			Time:     now.Add(-6 * 24 * time.Hour),
		},
	}

	// Include a live sentinel guest so hasLiveInventory is true and orphan detection runs.
	sentinelKey := BuildGuestKey("inst", "snode", 9999)
	guestsByKey := map[string]GuestLookup{
		sentinelKey: {ResourceID: "qemu/9999", Name: "sentinel-vm", Instance: "inst", Node: "snode", Type: "qemu", VMID: 9999},
	}
	guestsByVMID := map[string][]GuestLookup{
		"9999": {guestsByKey[sentinelKey]},
	}

	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	defer m.mu.RUnlock()
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-age-") {
			t.Fatalf("expected orphaned backup to be skipped, found alert %s", id)
		}
		if strings.HasPrefix(id, "backup-orphaned-") {
			t.Fatalf("expected orphaned backup alert to be suppressed when alertOrphaned=false, found alert %s", id)
		}
	}
}

func TestCheckBackupsCreatesOrphanedAlert(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	alertOrphaned := true
	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:       true,
		WarningDays:   7,
		CriticalDays:  14,
		AlertOrphaned: &alertOrphaned,
		IgnoreVMIDs:   []string{},
	}
	m.mu.Unlock()

	now := time.Now()
	// Backup is only 1 day old — well below both age thresholds.
	storageBackups := []models.StorageBackup{
		{
			ID:       "inst-node-200-backup",
			Storage:  "local",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     200,
			Time:     now.Add(-1 * 24 * time.Hour),
		},
	}

	// Include a live sentinel guest so hasLiveInventory is true and orphan detection runs.
	// VMID 200 is still orphaned because it's not in the inventory.
	sentinelKey := BuildGuestKey("inst", "snode", 9999)
	guestsByKey := map[string]GuestLookup{
		sentinelKey: {ResourceID: "qemu/9999", Name: "sentinel-vm", Instance: "inst", Node: "snode", Type: "qemu", VMID: 9999},
	}
	guestsByVMID := map[string][]GuestLookup{
		"9999": {guestsByKey[sentinelKey]},
	}

	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	defer m.mu.RUnlock()
	found := false
	for id, alert := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-orphaned-") {
			found = true
			if alert.Type != "backup-orphaned" {
				t.Fatalf("expected alert type backup-orphaned, got %s", alert.Type)
			}
			if alert.Level != AlertLevelWarning {
				t.Fatalf("expected alert level warning, got %s", alert.Level)
			}
			if alert.Metadata == nil || alert.Metadata["orphaned"] != true {
				t.Fatalf("expected metadata orphaned=true")
			}
			if alert.Metadata["vmid"] != "200" {
				t.Fatalf("expected metadata vmid=200, got %v", alert.Metadata["vmid"])
			}
		}
	}
	if !found {
		t.Fatalf("expected a backup-orphaned alert to be created for orphaned VMID 200")
	}
	// Also verify no backup-age alert was created (below thresholds).
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-age-") {
			t.Fatalf("expected no backup-age alert for orphan below age threshold, found %s", id)
		}
	}
}

func TestCheckBackupsOrphanedAlertClearsWhenGuestReappears(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	alertOrphaned := true
	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:       true,
		WarningDays:   7,
		CriticalDays:  14,
		AlertOrphaned: &alertOrphaned,
		IgnoreVMIDs:   []string{},
	}
	m.mu.Unlock()

	now := time.Now()
	storageBackups := []models.StorageBackup{
		{
			ID:       "inst-node-300-backup",
			Storage:  "local",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     300,
			Time:     now.Add(-1 * 24 * time.Hour),
		},
	}

	// Include a live sentinel guest on the same instance so orphan detection runs.
	sentinelKey := BuildGuestKey("inst", "snode", 9999)
	sentinelByKey := map[string]GuestLookup{
		sentinelKey: {ResourceID: "qemu/9999", Name: "sentinel-vm", Instance: "inst", Node: "snode", Type: "qemu", VMID: 9999},
	}
	sentinelByVMID := map[string][]GuestLookup{
		"9999": {sentinelByKey[sentinelKey]},
	}

	// First cycle: guest 300 absent (only sentinel present) → orphaned alert fires.
	m.CheckBackups(storageBackups, nil, nil, sentinelByKey, sentinelByVMID, nil)

	m.mu.RLock()
	orphanedFound := false
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-orphaned-") {
			orphanedFound = true
		}
	}
	m.mu.RUnlock()
	if !orphanedFound {
		t.Fatalf("expected orphaned alert after first cycle")
	}

	// Second cycle: guest reappears in inventory → orphaned alert should clear.
	guestKey := BuildGuestKey("inst", "node", 300)
	guestsByKey := map[string]GuestLookup{
		guestKey: {ResourceID: "qemu/300", Name: "restored-vm", Instance: "inst", Node: "node", Type: "qemu", VMID: 300},
	}
	guestsByVMID := map[string][]GuestLookup{
		"300": {guestsByKey[guestKey]},
	}
	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	defer m.mu.RUnlock()
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-orphaned-") {
			t.Fatalf("expected orphaned alert to be cleared after guest reappears, found %s", id)
		}
	}
}

func TestCheckBackupsOrphanedIgnoresVMIDs(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	alertOrphaned := true
	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:       true,
		WarningDays:   7,
		CriticalDays:  14,
		AlertOrphaned: &alertOrphaned,
		IgnoreVMIDs:   []string{"20*"},
	}
	m.mu.Unlock()

	now := time.Now()
	storageBackups := []models.StorageBackup{
		{
			ID:       "inst-node-200-backup",
			Storage:  "local",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     200,
			Time:     now.Add(-1 * 24 * time.Hour),
		},
		{
			ID:       "inst-node-300-backup",
			Storage:  "local",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     300,
			Time:     now.Add(-1 * 24 * time.Hour),
		},
	}

	// Include a live sentinel guest so hasLiveInventory is true and orphan detection runs.
	sentinelKey := BuildGuestKey("inst", "snode", 9999)
	guestsByKey := map[string]GuestLookup{
		sentinelKey: {ResourceID: "qemu/9999", Name: "sentinel-vm", Instance: "inst", Node: "snode", Type: "qemu", VMID: 9999},
	}
	guestsByVMID := map[string][]GuestLookup{
		"9999": {guestsByKey[sentinelKey]},
	}

	// Both are orphaned (not in inventory), but VMID 200 matches ignore pattern "20*".
	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	defer m.mu.RUnlock()
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-orphaned-") && strings.Contains(id, "200") {
			t.Fatalf("expected orphaned alert for VMID 200 to be suppressed by ignoreVMIDs, found %s", id)
		}
	}
	found300 := false
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-orphaned-") && strings.Contains(id, "300") {
			found300 = true
		}
	}
	if !found300 {
		t.Fatalf("expected orphaned alert for VMID 300 (not in ignore list)")
	}
}

func TestCheckBackupsOrphanedWithZeroAgeThresholds(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	alertOrphaned := true
	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:       true,
		WarningDays:   0,
		CriticalDays:  0,
		AlertOrphaned: &alertOrphaned,
		IgnoreVMIDs:   []string{},
	}
	m.mu.Unlock()

	now := time.Now()
	storageBackups := []models.StorageBackup{
		{
			ID:       "inst-node-400-backup",
			Storage:  "local",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     400,
			Time:     now.Add(-1 * 24 * time.Hour),
		},
	}

	// Include a live sentinel guest so hasLiveInventory is true and orphan detection runs.
	sentinelKey := BuildGuestKey("inst", "snode", 9999)
	guestsByKey := map[string]GuestLookup{
		sentinelKey: {ResourceID: "qemu/9999", Name: "sentinel-vm", Instance: "inst", Node: "snode", Type: "qemu", VMID: 9999},
	}
	guestsByVMID := map[string][]GuestLookup{
		"9999": {guestsByKey[sentinelKey]},
	}

	// Orphaned guest with zero age thresholds — should still fire orphaned alert.
	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	defer m.mu.RUnlock()
	found := false
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-orphaned-") {
			found = true
		}
	}
	if !found {
		t.Fatalf("expected backup-orphaned alert even with zero age thresholds")
	}
	// No backup-age alerts should exist since thresholds are 0.
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-age-") {
			t.Fatalf("expected no backup-age alert with zero thresholds, found %s", id)
		}
	}
}

func TestCheckBackupsOrphanedWithPersistedMetadata(t *testing.T) {
	// When a guest is deleted, enrichWithPersistedMetadata adds an entry to
	// guestsByVMID with an empty ResourceID (just name/type metadata for display).
	// This must NOT suppress orphaned alerts — only live guests (ResourceID != "")
	// indicate the guest is still in inventory.
	m := newTestManager(t)
	m.ClearActiveAlerts()

	alertOrphaned := true
	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:       true,
		WarningDays:   7,
		CriticalDays:  14,
		AlertOrphaned: &alertOrphaned,
		IgnoreVMIDs:   []string{},
	}
	m.mu.Unlock()

	now := time.Now()
	storageBackups := []models.StorageBackup{
		{
			ID:       "inst-node-500-backup",
			Storage:  "local",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     500,
			Time:     now.Add(-1 * 24 * time.Hour),
		},
	}

	// Simulate persisted metadata for deleted guest: entry exists in
	// guestsByVMID but with empty ResourceID (no live guest).
	// Include a live sentinel guest on the same instance so orphan detection runs.
	sentinelKey := BuildGuestKey("inst", "snode", 9999)
	guestsByKey := map[string]GuestLookup{
		sentinelKey: {ResourceID: "qemu/9999", Name: "sentinel-vm", Instance: "inst", Node: "snode", Type: "qemu", VMID: 9999},
	}
	guestsByVMID := map[string][]GuestLookup{
		"500":  {{Name: "deleted-vm", Instance: "inst", Node: "node", Type: "qemu", VMID: 500}},
		"9999": {guestsByKey[sentinelKey]},
	}

	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	defer m.mu.RUnlock()
	found := false
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-orphaned-") {
			found = true
		}
	}
	if !found {
		t.Fatalf("expected backup-orphaned alert even when guestsByVMID has metadata-only entry (no ResourceID)")
	}
}

func TestCheckBackupsOrphanedSkippedWhenNoLiveInventory(t *testing.T) {
	// When no live guests exist (empty maps or only persisted metadata),
	// orphan detection is skipped entirely to avoid false positives during
	// startup race / auth failure / inventory outage.
	m := newTestManager(t)
	m.ClearActiveAlerts()

	alertOrphaned := true
	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:       true,
		WarningDays:   7,
		CriticalDays:  14,
		AlertOrphaned: &alertOrphaned,
		IgnoreVMIDs:   []string{},
	}
	m.mu.Unlock()

	now := time.Now()
	storageBackups := []models.StorageBackup{
		{
			ID:       "inst-node-600-backup",
			Storage:  "local",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     600,
			Time:     now.Add(-1 * 24 * time.Hour),
		},
	}

	// Completely empty guest maps — no live inventory.
	m.CheckBackups(storageBackups, nil, nil, map[string]GuestLookup{}, map[string][]GuestLookup{}, nil)

	m.mu.RLock()
	defer m.mu.RUnlock()
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-orphaned-") {
			t.Fatalf("expected no orphaned alerts when guest inventory is empty (startup race guard), found %s", id)
		}
	}
}

func TestCheckBackupsOrphanedPreservedWhenNoLiveInventory(t *testing.T) {
	// When a legitimate orphan alert already exists and inventory becomes
	// unavailable (auth failure, restart), the alert should be preserved
	// rather than cleared — we can't confirm it's resolved.
	m := newTestManager(t)
	m.ClearActiveAlerts()

	alertOrphaned := true
	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:       true,
		WarningDays:   7,
		CriticalDays:  14,
		AlertOrphaned: &alertOrphaned,
		IgnoreVMIDs:   []string{},
	}
	m.mu.Unlock()

	now := time.Now()
	storageBackups := []models.StorageBackup{
		{
			ID:       "inst-node-700-backup",
			Storage:  "local",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     700,
			Time:     now.Add(-1 * 24 * time.Hour),
		},
	}

	// First cycle: with live inventory → orphan alert fires.
	sentinelKey := BuildGuestKey("inst", "snode", 9999)
	guestsByKey := map[string]GuestLookup{
		sentinelKey: {ResourceID: "qemu/9999", Name: "sentinel-vm", Instance: "inst", Node: "snode", Type: "qemu", VMID: 9999},
	}
	guestsByVMID := map[string][]GuestLookup{
		"9999": {guestsByKey[sentinelKey]},
	}
	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	orphanFound := false
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-orphaned-") {
			orphanFound = true
		}
	}
	m.mu.RUnlock()
	if !orphanFound {
		t.Fatalf("expected orphan alert after first cycle with live inventory")
	}

	// Second cycle: inventory disappears (empty maps) — orphan alert must be preserved.
	m.CheckBackups(storageBackups, nil, nil, map[string]GuestLookup{}, map[string][]GuestLookup{}, nil)

	m.mu.RLock()
	defer m.mu.RUnlock()
	preserved := false
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-orphaned-") {
			preserved = true
		}
	}
	if !preserved {
		t.Fatalf("expected orphan alert to be preserved when inventory is unavailable, but it was cleared")
	}
}

func TestCheckBackupsOrphanedCrossInstanceVMID(t *testing.T) {
	// Instance A's guest (VMID 600) is deleted, but instance B has a live
	// guest with the same VMID. The storage backup from instance A should
	// still fire an orphaned alert — the live guest on instance B is irrelevant.
	m := newTestManager(t)
	m.ClearActiveAlerts()

	alertOrphaned := true
	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:       true,
		WarningDays:   7,
		CriticalDays:  14,
		AlertOrphaned: &alertOrphaned,
		IgnoreVMIDs:   []string{},
	}
	m.mu.Unlock()

	now := time.Now()
	// Storage backup from instance A.
	storageBackups := []models.StorageBackup{
		{
			ID:       "instA-nodeA-600-backup",
			Storage:  "local",
			Node:     "nodeA",
			Instance: "instA",
			Type:     "qemu",
			VMID:     600,
			Time:     now.Add(-1 * 24 * time.Hour),
		},
	}

	// Instance B has a live guest with the same VMID.
	// Instance A also has a sentinel guest so its inventory is considered populated.
	keyB := BuildGuestKey("instB", "nodeB", 600)
	sentinelA := BuildGuestKey("instA", "nodeA", 9999)
	guestsByKey := map[string]GuestLookup{
		keyB:      {ResourceID: "qemu/600", Name: "vm-instB", Instance: "instB", Node: "nodeB", Type: "qemu", VMID: 600},
		sentinelA: {ResourceID: "qemu/9999", Name: "sentinel-vm", Instance: "instA", Node: "nodeA", Type: "qemu", VMID: 9999},
	}
	guestsByVMID := map[string][]GuestLookup{
		"600":  {guestsByKey[keyB]},
		"9999": {guestsByKey[sentinelA]},
	}

	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	defer m.mu.RUnlock()
	found := false
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-orphaned-") {
			found = true
		}
	}
	if !found {
		t.Fatalf("expected backup-orphaned alert for instA even though instB has a live guest with the same VMID")
	}
}

func TestCheckBackupsSkipsPVEOrphanDetectionUntilTemplateInventoryReady(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	alertOrphaned := true
	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:       true,
		WarningDays:   7,
		CriticalDays:  14,
		AlertOrphaned: &alertOrphaned,
		IgnoreVMIDs:   []string{},
	}
	m.mu.Unlock()

	now := time.Now()
	storageBackups := []models.StorageBackup{
		{
			ID:       "instA-node2-700-backup",
			Storage:  "local",
			Node:     "node2",
			Instance: "instA",
			Type:     "qemu",
			VMID:     700,
			Time:     now.Add(-1 * 24 * time.Hour),
		},
	}

	// Simulate the startup/concurrency window where the instance has enough live
	// guest data to satisfy the legacy readiness heuristic, but template inventory
	// has not been populated yet. This backup must not be treated as orphaned.
	sentinelKey := BuildGuestKey("instA", "node3", 9999)
	guestsByKey := map[string]GuestLookup{
		sentinelKey: {
			ResourceID: "qemu/9999",
			Name:       "sentinel-vm",
			Instance:   "instA",
			Node:       "node3",
			Type:       "qemu",
			VMID:       9999,
		},
	}
	guestsByVMID := map[string][]GuestLookup{
		"9999": {guestsByKey[sentinelKey]},
	}

	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, map[string]bool{})

	m.mu.RLock()
	defer m.mu.RUnlock()
	for id := range m.activeAlerts {
		if strings.HasPrefix(id, "backup-orphaned-") {
			t.Fatalf("expected no orphaned alert before template inventory is ready, found %s", id)
		}
	}
}

func TestCheckBackupsIgnoresVMIDs(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	alertOrphaned := true
	m.mu.Lock()
	m.config.Enabled = true
	m.config.BackupDefaults = BackupAlertConfig{
		Enabled:       true,
		WarningDays:   1,
		CriticalDays:  2,
		AlertOrphaned: &alertOrphaned,
		IgnoreVMIDs:   []string{"10*"},
	}
	m.mu.Unlock()

	now := time.Now()
	storageBackups := []models.StorageBackup{
		{
			ID:       "inst-node-101-backup",
			Storage:  "local",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     101,
			Time:     now.Add(-3 * 24 * time.Hour),
		},
		{
			ID:       "inst-node-200-backup",
			Storage:  "local",
			Node:     "node",
			Instance: "inst",
			Type:     "qemu",
			VMID:     200,
			Time:     now.Add(-3 * 24 * time.Hour),
		},
	}

	keyIgnored := BuildGuestKey("inst", "node", 101)
	keyAllowed := BuildGuestKey("inst", "node", 200)
	guestsByKey := map[string]GuestLookup{
		keyIgnored: {ResourceID: "qemu/101", Name: "ignored-vm", Instance: "inst", Node: "node", Type: "qemu", VMID: 101},
		keyAllowed: {ResourceID: "qemu/200", Name: "allowed-vm", Instance: "inst", Node: "node", Type: "qemu", VMID: 200},
	}
	guestsByVMID := map[string][]GuestLookup{
		"101": {guestsByKey[keyIgnored]},
		"200": {guestsByKey[keyAllowed]},
	}

	m.CheckBackups(storageBackups, nil, nil, guestsByKey, guestsByVMID, nil)

	m.mu.RLock()
	_, ignoredExists := m.activeAlerts["backup-age-"+sanitizeAlertKey(keyIgnored)]
	_, allowedExists := m.activeAlerts["backup-age-"+sanitizeAlertKey(keyAllowed)]
	m.mu.RUnlock()

	if ignoredExists {
		t.Fatalf("expected backup alert for ignored VMID to be suppressed")
	}
	if !allowedExists {
		t.Fatalf("expected backup alert for non-ignored VMID")
	}
}

func TestCheckDockerHostIgnoresContainersByPrefix(t *testing.T) {
	m := newTestManager(t)

	m.mu.Lock()
	m.config.DockerIgnoredContainerPrefixes = []string{"runner-"}
	m.mu.Unlock()

	container := models.DockerContainer{
		ID:     "1234567890ab",
		Name:   "runner-auto-1",
		State:  "exited",
		Status: "Exited (0) 3 seconds ago",
	}

	host := models.DockerHost{
		ID:          "host-ephemeral",
		Hostname:    "ci-host",
		DisplayName: "CI Host",
		Containers:  []models.DockerContainer{container},
	}

	resourceID := dockerResourceID(host.ID, container.ID)
	alertID := fmt.Sprintf("docker-container-state-%s", resourceID)

	// Run twice to satisfy the confirmation threshold when not ignored
	m.CheckDockerHost(host)
	m.CheckDockerHost(host)

	if _, exists := m.activeAlerts[alertID]; exists {
		t.Fatalf("expected no state alert for ignored container")
	}
	if _, exists := m.dockerStateConfirm[resourceID]; exists {
		t.Fatalf("expected no state confirmation tracking for ignored container")
	}
}

func TestDockerServiceReplicaAlerts(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	m.mu.RLock()
	cfg := m.config
	m.mu.RUnlock()
	cfg.Enabled = true
	m.UpdateConfig(cfg)

	host := models.DockerHost{
		ID:          "host-1",
		DisplayName: "Prod Swarm",
		Hostname:    "swarm-prod",
		Services: []models.DockerService{
			{
				ID:           "svc-1",
				Name:         "web",
				DesiredTasks: 4,
				RunningTasks: 2,
				Mode:         "replicated",
			},
		},
	}

	m.CheckDockerHost(host)

	resourceID := dockerServiceResourceID(host.ID, "svc-1", "web")
	alertID := fmt.Sprintf("docker-service-health-%s", resourceID)
	alert, exists := m.activeAlerts[alertID]
	if !exists {
		t.Fatalf("expected service alert %s to be raised", alertID)
	}
	if alert.Level != AlertLevelCritical {
		t.Fatalf("expected critical severity, got %s", alert.Level)
	}
	if missing, ok := alert.Metadata["missingTasks"].(int); !ok || missing != 2 {
		t.Fatalf("expected missingTasks metadata to be 2, got %v", alert.Metadata["missingTasks"])
	}

	// Resolve by restoring replicas
	host.Services[0].RunningTasks = 4
	m.CheckDockerHost(host)

	if _, exists := m.activeAlerts[alertID]; exists {
		t.Fatalf("expected service alert %s to be cleared when replicas restored", alertID)
	}
}

func TestDockerServiceAlertDoesNotRenotifyWhenUnchanged(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	cfg := m.GetConfig()
	cfg.Enabled = true
	cfg.ActivationState = ActivationActive
	cfg.Schedule.MaxAlertsHour = 100
	m.UpdateConfig(cfg)

	dispatched := make(chan string, 4)
	m.SetAlertCallback(func(alert *Alert) {
		dispatched <- alert.ID
	})

	host := models.DockerHost{
		ID:          "host-1",
		DisplayName: "Prod Swarm",
		Hostname:    "swarm-prod",
		Services: []models.DockerService{
			{
				ID:           "svc-1",
				Name:         "web",
				DesiredTasks: 4,
				RunningTasks: 2,
				Mode:         "replicated",
			},
		},
	}

	m.CheckDockerHost(host)

	select {
	case <-dispatched:
	case <-time.After(1 * time.Second):
		t.Fatal("expected initial docker service alert notification")
	}

	// Same degraded state should update LastSeen/value but not re-notify every poll.
	m.CheckDockerHost(host)

	select {
	case id := <-dispatched:
		t.Fatalf("expected no second notification for unchanged service alert, got %s", id)
	case <-time.After(250 * time.Millisecond):
	}
}

func TestDockerServiceAlertPreservesLastNotifiedWhenUnchanged(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	cfg := m.GetConfig()
	cfg.Enabled = true
	cfg.ActivationState = ActivationActive
	cfg.Schedule.MaxAlertsHour = 100
	m.UpdateConfig(cfg)

	host := models.DockerHost{
		ID:          "host-1",
		DisplayName: "Prod Swarm",
		Hostname:    "swarm-prod",
		Services: []models.DockerService{
			{
				ID:           "svc-1",
				Name:         "web",
				DesiredTasks: 4,
				RunningTasks: 2,
				Mode:         "replicated",
			},
		},
	}

	m.CheckDockerHost(host)

	resourceID := dockerServiceResourceID(host.ID, "svc-1", "web")
	alertID := fmt.Sprintf("docker-service-health-%s", resourceID)
	alert, exists := m.activeAlerts[alertID]
	if !exists {
		t.Fatalf("expected service alert %s to be raised", alertID)
	}

	notifiedAt := time.Now().Add(-2 * time.Minute).UTC()
	alert.LastNotified = &notifiedAt

	// Same degraded state should keep LastNotified while refreshing state.
	m.CheckDockerHost(host)

	updated, exists := m.activeAlerts[alertID]
	if !exists {
		t.Fatalf("expected service alert %s to remain active", alertID)
	}
	if updated.LastNotified == nil {
		t.Fatal("expected LastNotified to be preserved, got nil")
	}
	if !updated.LastNotified.Equal(notifiedAt) {
		t.Fatalf("expected LastNotified %s, got %s", notifiedAt, updated.LastNotified)
	}
}

func TestDockerServiceAlertRenotifiesOnEscalationToCritical(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	cfg := m.GetConfig()
	cfg.Enabled = true
	cfg.ActivationState = ActivationActive
	cfg.Schedule.MaxAlertsHour = 100
	cfg.DockerDefaults.ServiceWarnGapPct = 10
	cfg.DockerDefaults.ServiceCritGapPct = 50
	m.UpdateConfig(cfg)

	dispatched := make(chan AlertLevel, 4)
	m.SetAlertCallback(func(alert *Alert) {
		dispatched <- alert.Level
	})

	host := models.DockerHost{
		ID:          "host-1",
		DisplayName: "Prod Swarm",
		Hostname:    "swarm-prod",
		Services: []models.DockerService{
			{
				ID:           "svc-1",
				Name:         "web",
				DesiredTasks: 4,
				RunningTasks: 3, // 25% missing -> warning
				Mode:         "replicated",
			},
		},
	}

	m.CheckDockerHost(host)

	select {
	case level := <-dispatched:
		if level != AlertLevelWarning {
			t.Fatalf("expected warning notification first, got %s", level)
		}
	case <-time.After(1 * time.Second):
		t.Fatal("expected initial warning notification")
	}

	// Escalate from warning to critical: should notify again.
	host.Services[0].RunningTasks = 1 // 75% missing -> critical
	m.CheckDockerHost(host)

	select {
	case level := <-dispatched:
		if level != AlertLevelCritical {
			t.Fatalf("expected critical escalation notification, got %s", level)
		}
	case <-time.After(1 * time.Second):
		t.Fatal("expected escalation notification")
	}
}

func TestDockerServiceUpdateStateAlert(t *testing.T) {
	m := newTestManager(t)
	cfg := m.GetConfig()
	cfg.Enabled = true
	m.UpdateConfig(cfg)

	now := time.Now()
	host := models.DockerHost{
		ID:          "host-update",
		DisplayName: "Swarm",
		Hostname:    "swarm.local",
		Services: []models.DockerService{
			{
				ID:           "svc-update",
				Name:         "api",
				DesiredTasks: 1,
				RunningTasks: 1,
				UpdateStatus: &models.DockerServiceUpdate{
					State:       "rollback_failed",
					Message:     "Rollback failed",
					CompletedAt: &now,
				},
			},
		},
	}

	m.CheckDockerHost(host)

	resourceID := dockerServiceResourceID(host.ID, "svc-update", "api")
	alertID := fmt.Sprintf("docker-service-health-%s", resourceID)
	alert, exists := m.activeAlerts[alertID]
	if !exists {
		t.Fatalf("expected docker service alert %s to be raised", alertID)
	}
	if alert.Level != AlertLevelCritical {
		t.Fatalf("expected critical severity for rollback failure, got %s", alert.Level)
	}
	if state, ok := alert.Metadata["updateState"].(string); !ok || state != "rollback_failed" {
		t.Fatalf("expected updateState metadata to be rollback_failed, got %v", alert.Metadata["updateState"])
	}
}

func TestDockerContainerStateUsesDockerDefaults(t *testing.T) {
	m := newTestManager(t)
	cfg := m.GetConfig()
	cfg.DockerDefaults.StatePoweredOffSeverity = AlertLevelCritical
	m.UpdateConfig(cfg)

	container := models.DockerContainer{
		ID:     "container-1",
		Name:   "web",
		State:  "exited",
		Status: "Exited (1) seconds ago",
	}
	host := models.DockerHost{
		ID:          "host-1",
		DisplayName: "Docker Host",
		Hostname:    "docker.local",
		Containers:  []models.DockerContainer{container},
	}

	m.CheckDockerHost(host)
	m.CheckDockerHost(host)

	resourceID := dockerResourceID(host.ID, container.ID)
	alertID := fmt.Sprintf("docker-container-state-%s", resourceID)
	alert, exists := m.activeAlerts[alertID]
	if !exists {
		t.Fatalf("expected docker container state alert %s to be raised", alertID)
	}
	if alert.Level != AlertLevelCritical {
		t.Fatalf("expected critical severity from docker defaults, got %s", alert.Level)
	}
}

func TestDockerContainerStateRespectsDisableDefault(t *testing.T) {
	m := newTestManager(t)
	cfg := m.GetConfig()
	cfg.DockerDefaults.StateDisableConnectivity = true
	m.UpdateConfig(cfg)

	container := models.DockerContainer{
		ID:     "container-2",
		Name:   "batch",
		State:  "exited",
		Status: "Exited (0) seconds ago",
	}
	host := models.DockerHost{
		ID:          "host-2",
		DisplayName: "Docker Host",
		Hostname:    "docker.example",
		Containers:  []models.DockerContainer{container},
	}

	m.CheckDockerHost(host)
	m.CheckDockerHost(host)

	resourceID := dockerResourceID(host.ID, container.ID)
	alertID := fmt.Sprintf("docker-container-state-%s", resourceID)
	if _, exists := m.activeAlerts[alertID]; exists {
		t.Fatalf("did not expect docker container state alert when defaults disable connectivity")
	}
}

func TestDockerContainerMemoryLimitHysteresis(t *testing.T) {
	m := newTestManager(t)

	hostID := "host-mem"
	containerID := "container-mem"
	hostHigh := models.DockerHost{
		ID:          hostID,
		DisplayName: "Docker Host",
		Hostname:    "docker.mem",
		Containers: []models.DockerContainer{
			{
				ID:          containerID,
				Name:        "memory-hog",
				State:       "running",
				Status:      "Up 10 minutes",
				MemoryUsage: 96 * 1024 * 1024,
				MemoryLimit: 100 * 1024 * 1024,
			},
		},
	}

	m.CheckDockerHost(hostHigh)

	resourceID := dockerResourceID(hostID, containerID)
	alertID := fmt.Sprintf("docker-container-memory-limit-%s", resourceID)
	if _, exists := m.activeAlerts[alertID]; !exists {
		t.Fatalf("expected memory limit alert to be raised")
	}

	hostLow := models.DockerHost{
		ID:          hostID,
		DisplayName: "Docker Host",
		Hostname:    "docker.mem",
		Containers: []models.DockerContainer{
			{
				ID:          containerID,
				Name:        "memory-hog",
				State:       "running",
				Status:      "Up 12 minutes",
				MemoryUsage: 80 * 1024 * 1024,
				MemoryLimit: 100 * 1024 * 1024,
			},
		},
	}

	m.CheckDockerHost(hostLow)

	if _, exists := m.activeAlerts[alertID]; exists {
		t.Fatalf("expected memory limit alert to clear after usage dropped below hysteresis threshold")
	}
}

func TestDockerContainerDiskUsageAlert(t *testing.T) {
	m := newTestManager(t)

	cfg := m.GetConfig()
	cfg.Enabled = true
	cfg.TimeThreshold = 0
	if cfg.TimeThresholds == nil {
		cfg.TimeThresholds = make(map[string]int)
	}
	cfg.TimeThresholds["docker"] = 0
	cfg.TimeThresholds["guest"] = 0
	cfg.DockerDefaults.Disk = HysteresisThreshold{Trigger: 75, Clear: 65}
	m.UpdateConfig(cfg)

	const gib = 1024 * 1024 * 1024

	host := models.DockerHost{
		ID:          "host-disk",
		DisplayName: "Docker Host",
		Hostname:    "docker.disk",
		Containers: []models.DockerContainer{
			{
				ID:                  "container-disk",
				Name:                "disk-hog",
				State:               "running",
				Status:              "Up 5 minutes",
				WritableLayerBytes:  int64(8 * gib),
				RootFilesystemBytes: int64(10 * gib),
			},
		},
	}

	m.CheckDockerHost(host)

	resourceID := dockerResourceID(host.ID, host.Containers[0].ID)
	alertID := fmt.Sprintf("%s-%s", resourceID, "disk")
	alert, exists := m.activeAlerts[alertID]
	if !exists {
		t.Fatalf("expected docker container disk alert %s to be raised", alertID)
	}
	if alert.Level != AlertLevelWarning {
		t.Fatalf("expected warning severity for disk usage alert, got %s", alert.Level)
	}
	if alert.Metadata == nil {
		t.Fatalf("expected disk alert metadata to be populated")
	}
	if percent, ok := alert.Metadata["diskPercent"].(float64); !ok || percent < 79.5 || percent > 80.5 {
		t.Fatalf("expected diskPercent metadata to be ~80%%, got %v", alert.Metadata["diskPercent"])
	}
	if used, ok := alert.Metadata["writableLayerBytes"].(int64); !ok || used != int64(8*gib) {
		t.Fatalf("expected writableLayerBytes metadata to be %d, got %v", int64(8*gib), alert.Metadata["writableLayerBytes"])
	}

	// Drop usage below the clear threshold and ensure the alert resolves.
	host.Containers[0].WritableLayerBytes = int64(4 * gib)
	m.CheckDockerHost(host)

	if _, stillActive := m.activeAlerts[alertID]; stillActive {
		t.Fatalf("expected docker container disk alert %s to clear after usage dropped", alertID)
	}
}

func TestUpdateConfigClampsDockerServiceCriticalGap(t *testing.T) {
	// t.Parallel()

	m := newTestManager(t)

	cfg := AlertConfig{
		Enabled:        true,
		GuestDefaults:  ThresholdConfig{},
		NodeDefaults:   ThresholdConfig{},
		HostDefaults:   ThresholdConfig{},
		StorageDefault: HysteresisThreshold{},
		DockerDefaults: DockerThresholdConfig{
			ServiceWarnGapPct: 35,
			ServiceCritGapPct: 20,
		},
		PMGDefaults:      PMGThresholdConfig{},
		SnapshotDefaults: SnapshotAlertConfig{},
		BackupDefaults:   BackupAlertConfig{},
		Overrides:        make(map[string]ThresholdConfig),
		Schedule:         ScheduleConfig{},
	}

	m.UpdateConfig(cfg)

	m.mu.RLock()
	defer m.mu.RUnlock()

	if m.config.DockerDefaults.ServiceWarnGapPct != 35 {
		t.Fatalf("expected warning gap to remain 35, got %d", m.config.DockerDefaults.ServiceWarnGapPct)
	}
	if m.config.DockerDefaults.ServiceCritGapPct != 35 {
		t.Fatalf("expected critical gap to be clamped to 35, got %d", m.config.DockerDefaults.ServiceCritGapPct)
	}
}

func TestDockerServiceAlertUsesClampedCriticalGap(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	cfg := AlertConfig{
		Enabled:        true,
		GuestDefaults:  ThresholdConfig{},
		NodeDefaults:   ThresholdConfig{},
		HostDefaults:   ThresholdConfig{},
		StorageDefault: HysteresisThreshold{},
		DockerDefaults: DockerThresholdConfig{
			ServiceWarnGapPct: 20,
			ServiceCritGapPct: 5,
		},
		PMGDefaults:      PMGThresholdConfig{},
		SnapshotDefaults: SnapshotAlertConfig{},
		BackupDefaults:   BackupAlertConfig{},
		Overrides:        make(map[string]ThresholdConfig),
		Schedule:         ScheduleConfig{},
	}

	m.UpdateConfig(cfg)

	host := models.DockerHost{
		ID:          "docker-host-1",
		DisplayName: "Docker Host",
		Hostname:    "docker-host.local",
		Services: []models.DockerService{
			{
				ID:           "svc-123",
				Name:         "api",
				DesiredTasks: 10,
				RunningTasks: 7,
			},
		},
	}

	m.CheckDockerHost(host)

	resourceID := dockerServiceResourceID(host.ID, "svc-123", "api")
	alertID := fmt.Sprintf("docker-service-health-%s", resourceID)

	alert, exists := m.activeAlerts[alertID]
	if !exists {
		t.Fatalf("expected docker service alert %s to be raised", alertID)
	}
	if alert.Level != AlertLevelCritical {
		t.Fatalf("expected critical severity when replicas 7/10, got %s", alert.Level)
	}
	if pct, ok := alert.Metadata["percentMissing"].(float64); !ok || math.Abs(pct-30.0) > 0.01 {
		t.Fatalf("expected percentMissing metadata ~30, got %v", alert.Metadata["percentMissing"])
	}
}

// TestNormalizeHostDefaultsPreservesZeroTrigger verifies that setting
// Host Agent thresholds to 0 is preserved (fixes GitHub issue #864).
// Setting a threshold to 0 should disable alerting for that metric.
func TestNormalizeHostDefaultsPreservesZeroTrigger(t *testing.T) {
	// t.Parallel()

	t.Run("nil HostDefaults get factory defaults", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		cfg := AlertConfig{
			Enabled:      true,
			HostDefaults: ThresholdConfig{}, // Empty - needs defaults
		}

		m.UpdateConfig(cfg)

		m.mu.RLock()
		defer m.mu.RUnlock()

		if m.config.HostDefaults.CPU == nil {
			t.Fatal("CPU defaults should be set")
		}
		if m.config.HostDefaults.CPU.Trigger != 80 {
			t.Errorf("CPU trigger = %v, want 80", m.config.HostDefaults.CPU.Trigger)
		}
		if m.config.HostDefaults.Memory == nil {
			t.Fatal("Memory defaults should be set")
		}
		if m.config.HostDefaults.Memory.Trigger != 85 {
			t.Errorf("Memory trigger = %v, want 85", m.config.HostDefaults.Memory.Trigger)
		}
		if m.config.HostDefaults.Disk == nil {
			t.Fatal("Disk defaults should be set")
		}
		if m.config.HostDefaults.Disk.Trigger != 90 {
			t.Errorf("Disk trigger = %v, want 90", m.config.HostDefaults.Disk.Trigger)
		}
	})

	t.Run("Trigger=0 preserved to disable alerting", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Set Memory to 0 to disable memory alerting for host agents
		cfg := AlertConfig{
			Enabled: true,
			HostDefaults: ThresholdConfig{
				CPU:    &HysteresisThreshold{Trigger: 80, Clear: 75},
				Memory: &HysteresisThreshold{Trigger: 0, Clear: 0}, // Disabled
				Disk:   &HysteresisThreshold{Trigger: 90, Clear: 85},
			},
		}

		m.UpdateConfig(cfg)

		m.mu.RLock()
		defer m.mu.RUnlock()

		// Memory threshold should remain at 0 (disabled), not reset to default
		if m.config.HostDefaults.Memory == nil {
			t.Fatal("Memory defaults should be preserved (not nil)")
		}
		if m.config.HostDefaults.Memory.Trigger != 0 {
			t.Errorf("Memory trigger = %v, want 0 (disabled)", m.config.HostDefaults.Memory.Trigger)
		}
		if m.config.HostDefaults.Memory.Clear != 0 {
			t.Errorf("Memory clear = %v, want 0 (disabled)", m.config.HostDefaults.Memory.Clear)
		}

		// CPU and Disk should still have their values
		if m.config.HostDefaults.CPU.Trigger != 80 {
			t.Errorf("CPU trigger = %v, want 80", m.config.HostDefaults.CPU.Trigger)
		}
		if m.config.HostDefaults.Disk.Trigger != 90 {
			t.Errorf("Disk trigger = %v, want 90", m.config.HostDefaults.Disk.Trigger)
		}
	})

	t.Run("Trigger=0 sets Clear=0 automatically", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Set CPU to 0 with a non-zero Clear - Clear should be normalized to 0
		cfg := AlertConfig{
			Enabled: true,
			HostDefaults: ThresholdConfig{
				CPU:    &HysteresisThreshold{Trigger: 0, Clear: 50}, // Clear should become 0
				Memory: &HysteresisThreshold{Trigger: 85, Clear: 80},
				Disk:   &HysteresisThreshold{Trigger: 0, Clear: 75}, // Clear should become 0
			},
		}

		m.UpdateConfig(cfg)

		m.mu.RLock()
		defer m.mu.RUnlock()

		if m.config.HostDefaults.CPU.Clear != 0 {
			t.Errorf("CPU clear = %v, want 0 when trigger is 0", m.config.HostDefaults.CPU.Clear)
		}
		if m.config.HostDefaults.Disk.Clear != 0 {
			t.Errorf("Disk clear = %v, want 0 when trigger is 0", m.config.HostDefaults.Disk.Clear)
		}
	})

	t.Run("missing Clear computed from Trigger", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		cfg := AlertConfig{
			Enabled: true,
			HostDefaults: ThresholdConfig{
				CPU:    &HysteresisThreshold{Trigger: 90, Clear: 0}, // Clear should be computed
				Memory: &HysteresisThreshold{Trigger: 95, Clear: 0}, // Clear should be computed
				Disk:   &HysteresisThreshold{Trigger: 92, Clear: 0}, // Clear should be computed
			},
		}

		m.UpdateConfig(cfg)

		m.mu.RLock()
		defer m.mu.RUnlock()

		// Clear should be Trigger - 5
		if m.config.HostDefaults.CPU.Clear != 85 {
			t.Errorf("CPU clear = %v, want 85", m.config.HostDefaults.CPU.Clear)
		}
		if m.config.HostDefaults.Memory.Clear != 90 {
			t.Errorf("Memory clear = %v, want 90", m.config.HostDefaults.Memory.Clear)
		}
		if m.config.HostDefaults.Disk.Clear != 87 {
			t.Errorf("Disk clear = %v, want 87", m.config.HostDefaults.Disk.Clear)
		}
	})
}

// TestNormalizeStorageDefaultsPreservesZeroTrigger verifies that setting
// StorageDefault threshold to 0 is preserved to disable storage alerting.
func TestNormalizeStorageDefaultsPreservesZeroTrigger(t *testing.T) {
	// t.Parallel()

	t.Run("negative trigger gets factory defaults", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		cfg := AlertConfig{
			Enabled:        true,
			StorageDefault: HysteresisThreshold{Trigger: -1, Clear: 0},
		}

		m.UpdateConfig(cfg)

		m.mu.RLock()
		defer m.mu.RUnlock()

		if m.config.StorageDefault.Trigger != 85 {
			t.Errorf("StorageDefault trigger = %v, want 85", m.config.StorageDefault.Trigger)
		}
		if m.config.StorageDefault.Clear != 80 {
			t.Errorf("StorageDefault clear = %v, want 80", m.config.StorageDefault.Clear)
		}
	})

	t.Run("Trigger=0 preserved to disable storage alerting", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		cfg := AlertConfig{
			Enabled:        true,
			StorageDefault: HysteresisThreshold{Trigger: 0, Clear: 0},
		}

		m.UpdateConfig(cfg)

		m.mu.RLock()
		defer m.mu.RUnlock()

		if m.config.StorageDefault.Trigger != 0 {
			t.Errorf("StorageDefault trigger = %v, want 0 (disabled)", m.config.StorageDefault.Trigger)
		}
		if m.config.StorageDefault.Clear != 0 {
			t.Errorf("StorageDefault clear = %v, want 0 (disabled)", m.config.StorageDefault.Clear)
		}
	})

	t.Run("missing Clear computed from Trigger", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		cfg := AlertConfig{
			Enabled:        true,
			StorageDefault: HysteresisThreshold{Trigger: 90, Clear: 0},
		}

		m.UpdateConfig(cfg)

		m.mu.RLock()
		defer m.mu.RUnlock()

		if m.config.StorageDefault.Trigger != 90 {
			t.Errorf("StorageDefault trigger = %v, want 90", m.config.StorageDefault.Trigger)
		}
		if m.config.StorageDefault.Clear != 85 {
			t.Errorf("StorageDefault clear = %v, want 85 (trigger - 5)", m.config.StorageDefault.Clear)
		}
	})
}

// TestNormalizeNodeDefaultsTemperaturePreservesZeroTrigger verifies that setting
// NodeDefaults.Temperature threshold to 0 is preserved to disable temperature alerting.
func TestNormalizeNodeDefaultsTemperaturePreservesZeroTrigger(t *testing.T) {
	// t.Parallel()

	t.Run("nil Temperature gets factory defaults", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		cfg := AlertConfig{
			Enabled:      true,
			NodeDefaults: ThresholdConfig{}, // Empty - Temperature needs defaults
		}

		m.UpdateConfig(cfg)

		m.mu.RLock()
		defer m.mu.RUnlock()

		if m.config.NodeDefaults.Temperature == nil {
			t.Fatal("Temperature defaults should be set")
		}
		if m.config.NodeDefaults.Temperature.Trigger != 80 {
			t.Errorf("Temperature trigger = %v, want 80", m.config.NodeDefaults.Temperature.Trigger)
		}
		if m.config.NodeDefaults.Temperature.Clear != 75 {
			t.Errorf("Temperature clear = %v, want 75", m.config.NodeDefaults.Temperature.Clear)
		}
	})

	t.Run("Trigger=0 preserved to disable temperature alerting", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		cfg := AlertConfig{
			Enabled: true,
			NodeDefaults: ThresholdConfig{
				Temperature: &HysteresisThreshold{Trigger: 0, Clear: 0},
			},
		}

		m.UpdateConfig(cfg)

		m.mu.RLock()
		defer m.mu.RUnlock()

		if m.config.NodeDefaults.Temperature == nil {
			t.Fatal("Temperature should be preserved (not nil)")
		}
		if m.config.NodeDefaults.Temperature.Trigger != 0 {
			t.Errorf("Temperature trigger = %v, want 0 (disabled)", m.config.NodeDefaults.Temperature.Trigger)
		}
		if m.config.NodeDefaults.Temperature.Clear != 0 {
			t.Errorf("Temperature clear = %v, want 0 (disabled)", m.config.NodeDefaults.Temperature.Clear)
		}
	})

	t.Run("missing Clear computed from Trigger", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		cfg := AlertConfig{
			Enabled: true,
			NodeDefaults: ThresholdConfig{
				Temperature: &HysteresisThreshold{Trigger: 85, Clear: 0},
			},
		}

		m.UpdateConfig(cfg)

		m.mu.RLock()
		defer m.mu.RUnlock()

		if m.config.NodeDefaults.Temperature.Trigger != 85 {
			t.Errorf("Temperature trigger = %v, want 85", m.config.NodeDefaults.Temperature.Trigger)
		}
		if m.config.NodeDefaults.Temperature.Clear != 80 {
			t.Errorf("Temperature clear = %v, want 80 (trigger - 5)", m.config.NodeDefaults.Temperature.Clear)
		}
	})
}

// TestNormalizeDockerThresholdPreservesZeroTrigger verifies that Docker
// container thresholds can be set to 0 to disable alerting.
func TestNormalizeDockerThresholdPreservesZeroTrigger(t *testing.T) {
	// t.Parallel()

	t.Run("Trigger=0 disables Docker CPU alerting", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		cfg := AlertConfig{
			Enabled: true,
			DockerDefaults: DockerThresholdConfig{
				CPU:    HysteresisThreshold{Trigger: 0, Clear: 0},
				Memory: HysteresisThreshold{Trigger: 85, Clear: 80},
				Disk:   HysteresisThreshold{Trigger: 85, Clear: 80},
			},
		}

		m.UpdateConfig(cfg)

		m.mu.RLock()
		defer m.mu.RUnlock()

		if m.config.DockerDefaults.CPU.Trigger != 0 {
			t.Errorf("Docker CPU trigger = %v, want 0 (disabled)", m.config.DockerDefaults.CPU.Trigger)
		}
		if m.config.DockerDefaults.Memory.Trigger != 85 {
			t.Errorf("Docker Memory trigger = %v, want 85", m.config.DockerDefaults.Memory.Trigger)
		}
	})

	t.Run("negative trigger replaced with defaults", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		cfg := AlertConfig{
			Enabled: true,
			DockerDefaults: DockerThresholdConfig{
				CPU:    HysteresisThreshold{Trigger: -5, Clear: 0},
				Memory: HysteresisThreshold{Trigger: -10, Clear: 0},
				Disk:   HysteresisThreshold{Trigger: -1, Clear: 0},
			},
		}

		m.UpdateConfig(cfg)

		m.mu.RLock()
		defer m.mu.RUnlock()

		if m.config.DockerDefaults.CPU.Trigger != 80 {
			t.Errorf("Docker CPU trigger = %v, want 80 (default)", m.config.DockerDefaults.CPU.Trigger)
		}
		if m.config.DockerDefaults.Memory.Trigger != 85 {
			t.Errorf("Docker Memory trigger = %v, want 85 (default)", m.config.DockerDefaults.Memory.Trigger)
		}
		if m.config.DockerDefaults.Disk.Trigger != 85 {
			t.Errorf("Docker Disk trigger = %v, want 85 (default)", m.config.DockerDefaults.Disk.Trigger)
		}
	})
}

func TestNormalizeDockerIgnoredPrefixes(t *testing.T) {
	// t.Parallel()

	tests := []struct {
		name     string
		input    []string
		expected []string
	}{
		{
			name:     "nil input",
			input:    nil,
			expected: nil,
		},
		{
			name:     "blank entries removed",
			input:    []string{"", "   ", "\t"},
			expected: nil,
		},
		{
			name:     "trims and deduplicates preserving first occurrence casing",
			input:    []string{"  Foo ", "foo", "Bar", " bar ", "Baz"},
			expected: []string{"Foo", "Bar", "Baz"},
		},
		{
			name:     "already normalized list remains unchanged",
			input:    []string{"alpha", "beta"},
			expected: []string{"alpha", "beta"},
		},
	}

	for _, tc := range tests {
		tc := tc
		t.Run(tc.name, func(t *testing.T) {
			// t.Parallel()

			got := NormalizeDockerIgnoredPrefixes(tc.input)
			if !reflect.DeepEqual(got, tc.expected) {
				t.Fatalf("expected %v, got %v", tc.expected, got)
			}
		})
	}
}

func TestCheckDockerHostIgnoredPrefixClearsExistingAlerts(t *testing.T) {
	m := newTestManager(t)

	container := models.DockerContainer{
		ID:     "abc123456789",
		Name:   "runner-job-1",
		State:  "exited",
		Status: "Exited (1) 10 seconds ago",
	}
	host := models.DockerHost{
		ID:          "docker-host",
		DisplayName: "Docker Host",
		Hostname:    "docker-host.local",
		Containers:  []models.DockerContainer{container},
	}
	resourceID := dockerResourceID(host.ID, container.ID)
	stateAlertID := fmt.Sprintf("docker-container-state-%s", resourceID)
	healthAlertID := fmt.Sprintf("docker-container-health-%s", resourceID)
	restartAlertID := fmt.Sprintf("docker-container-restart-loop-%s", resourceID)

	m.mu.Lock()
	m.config.Enabled = true
	m.config.DockerIgnoredContainerPrefixes = []string{"runner-"}
	m.activeAlerts[stateAlertID] = &Alert{ID: stateAlertID, ResourceID: resourceID}
	m.activeAlerts[healthAlertID] = &Alert{ID: healthAlertID, ResourceID: resourceID}
	m.activeAlerts[restartAlertID] = &Alert{ID: restartAlertID, ResourceID: resourceID}
	m.dockerStateConfirm[resourceID] = 2
	m.dockerRestartTracking[resourceID] = &dockerRestartRecord{}
	m.dockerLastExitCode[resourceID] = 137
	m.mu.Unlock()

	m.CheckDockerHost(host)

	m.mu.RLock()
	defer m.mu.RUnlock()

	if _, exists := m.activeAlerts[stateAlertID]; exists {
		t.Fatalf("expected state alert cleared for ignored container")
	}
	if _, exists := m.activeAlerts[healthAlertID]; exists {
		t.Fatalf("expected health alert cleared for ignored container")
	}
	if _, exists := m.activeAlerts[restartAlertID]; exists {
		t.Fatalf("expected restart alert cleared for ignored container")
	}
	if _, exists := m.dockerStateConfirm[resourceID]; exists {
		t.Fatalf("expected state confirmation tracking cleared")
	}
	if _, exists := m.dockerRestartTracking[resourceID]; exists {
		t.Fatalf("expected restart tracking cleared")
	}
	if _, exists := m.dockerLastExitCode[resourceID]; exists {
		t.Fatalf("expected last exit code cleared")
	}
}

func TestUpdateConfigNormalizesDockerIgnoredPrefixes(t *testing.T) {
	// t.Parallel()

	t.Run("nil input remains nil", func(t *testing.T) {
		// t.Parallel()

		m := newTestManager(t)
		m.UpdateConfig(AlertConfig{})

		m.mu.RLock()
		defer m.mu.RUnlock()

		if m.config.DockerIgnoredContainerPrefixes != nil {
			t.Fatalf("expected nil prefixes, got %v", m.config.DockerIgnoredContainerPrefixes)
		}
	})

	t.Run("duplicates trimmed and deduplicated", func(t *testing.T) {
		// t.Parallel()

		m := newTestManager(t)
		cfg := AlertConfig{
			DockerIgnoredContainerPrefixes: []string{
				"  Foo ",
				"foo",
				"Bar",
			},
		}

		m.UpdateConfig(cfg)

		m.mu.RLock()
		defer m.mu.RUnlock()

		expected := []string{"Foo", "Bar"}
		if !reflect.DeepEqual(m.config.DockerIgnoredContainerPrefixes, expected) {
			t.Fatalf("expected normalized prefixes %v, got %v", expected, m.config.DockerIgnoredContainerPrefixes)
		}
	})
}

func TestMatchesDockerIgnoredPrefix(t *testing.T) {
	// t.Parallel()

	tests := []struct {
		name          string
		containerName string
		containerID   string
		prefixes      []string
		want          bool
	}{
		{name: "empty prefixes", containerName: "runner-123", containerID: "abc", prefixes: nil, want: false},
		{name: "match with name", containerName: "runner-123", containerID: "abc", prefixes: []string{"runner-"}, want: true},
		{name: "match with id", containerName: "app", containerID: "abc123", prefixes: []string{"abc"}, want: true},
		{name: "trimmed comparison", containerName: "runner-job", containerID: "abc", prefixes: []string{"  runner- "}, want: true},
		{name: "case insensitive", containerName: "Runner-Job", containerID: "abc", prefixes: []string{"runner-"}, want: true},
		{name: "no match", containerName: "service", containerID: "xyz", prefixes: []string{"runner-"}, want: false},
		{name: "skips empty prefix in list", containerName: "runner-job", containerID: "abc", prefixes: []string{"", "runner-"}, want: true},
		{name: "all empty prefixes returns false", containerName: "runner-job", containerID: "abc", prefixes: []string{"", "  ", ""}, want: false},
		{name: "empty name matches id", containerName: "", containerID: "runner-123", prefixes: []string{"runner-"}, want: true},
		{name: "empty id matches name", containerName: "runner-job", containerID: "", prefixes: []string{"runner-"}, want: true},
		{name: "both empty no match", containerName: "", containerID: "", prefixes: []string{"runner-"}, want: false},
	}

	for _, tc := range tests {
		tc := tc
		t.Run(tc.name, func(t *testing.T) {
			// t.Parallel()

			if got := matchesDockerIgnoredPrefix(tc.containerName, tc.containerID, tc.prefixes); got != tc.want {
				t.Fatalf("matchesDockerIgnoredPrefix(%q, %q, %v) = %v, want %v", tc.containerName, tc.containerID, tc.prefixes, got, tc.want)
			}
		})
	}
}

func TestDockerInstanceName(t *testing.T) {
	// t.Parallel()

	tests := []struct {
		name string
		host models.DockerHost
		want string
	}{
		{name: "uses display name", host: models.DockerHost{DisplayName: "Prod Host"}, want: "Docker:Prod Host"},
		{name: "falls back to hostname", host: models.DockerHost{Hostname: "docker.local"}, want: "Docker:docker.local"},
		{name: "defaults when empty", host: models.DockerHost{}, want: "Docker"},
	}

	for _, tc := range tests {
		tc := tc
		t.Run(tc.name, func(t *testing.T) {
			// t.Parallel()

			if got := dockerInstanceName(tc.host); got != tc.want {
				t.Fatalf("dockerInstanceName(%+v) = %q, want %q", tc.host, got, tc.want)
			}
		})
	}
}

func TestDockerContainerDisplayName(t *testing.T) {
	// t.Parallel()

	tests := []struct {
		name      string
		container models.DockerContainer
		want      string
	}{
		{name: "trims whitespace", container: models.DockerContainer{Name: "  app  "}, want: "app"},
		{name: "strips leading slash", container: models.DockerContainer{Name: "/runner"}, want: "runner"},
		{name: "falls back to id truncated", container: models.DockerContainer{ID: "0123456789abcdef"}, want: "0123456789ab"},
	}

	for _, tc := range tests {
		tc := tc
		t.Run(tc.name, func(t *testing.T) {
			// t.Parallel()

			if got := dockerContainerDisplayName(tc.container); got != tc.want {
				t.Fatalf("dockerContainerDisplayName(%+v) = %q, want %q", tc.container, got, tc.want)
			}
		})
	}
}

func TestDockerResourceID(t *testing.T) {
	// t.Parallel()

	tests := []struct {
		name        string
		hostID      string
		containerID string
		want        string
	}{
		{name: "both ids present", hostID: "host1", containerID: "abc", want: "docker:host1/abc"},
		{name: "missing host id", hostID: "", containerID: "abc", want: "docker:container/abc"},
		{name: "missing container id", hostID: "host1", containerID: "", want: "docker:host1"},
		{name: "both missing", hostID: "", containerID: "", want: "docker:unknown"},
	}

	for _, tc := range tests {
		tc := tc
		t.Run(tc.name, func(t *testing.T) {
			// t.Parallel()

			if got := dockerResourceID(tc.hostID, tc.containerID); got != tc.want {
				t.Fatalf("dockerResourceID(%q, %q) = %q, want %q", tc.hostID, tc.containerID, got, tc.want)
			}
		})
	}
}

func TestHasKnownFirmwareBug(t *testing.T) {
	// t.Parallel()

	tests := []struct {
		name  string
		model string
		want  bool
	}{
		{name: "Samsung 980 with SSD prefix", model: "Samsung SSD 980 1TB", want: true},
		{name: "Samsung 980 without SSD prefix", model: "Samsung 980 PRO 2TB", want: true},
		{name: "Samsung 990 with SSD prefix", model: "Samsung SSD 990 PRO 2TB", want: true},
		{name: "Samsung 990 without SSD prefix", model: "Samsung 990 EVO 1TB", want: true},
		{name: "Samsung 980 lowercase", model: "samsung ssd 980 1tb", want: true},
		{name: "Samsung 990 mixed case", model: "SAMSUNG 990 PRO", want: true},
		{name: "Samsung 970 (not affected)", model: "Samsung SSD 970 EVO Plus", want: false},
		{name: "Samsung 870 (not affected)", model: "Samsung 870 QVO", want: false},
		{name: "Other manufacturer", model: "WD Blue SN570", want: false},
		{name: "Empty model", model: "", want: false},
	}

	for _, tc := range tests {
		tc := tc
		t.Run(tc.name, func(t *testing.T) {
			// t.Parallel()

			if got := hasKnownFirmwareBug(tc.model); got != tc.want {
				t.Fatalf("hasKnownFirmwareBug(%q) = %v, want %v", tc.model, got, tc.want)
			}
		})
	}
}

func TestCheckDiskHealthSkipsSamsung980FalseAlerts(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	// Samsung 980 reporting FAILED health (firmware bug) but actually healthy
	disk := proxmox.Disk{
		DevPath: "/dev/nvme0n1",
		Model:   "Samsung SSD 980 1TB",
		Serial:  "S649NF0R123456",
		Type:    "nvme",
		Health:  "FAILED", // False report due to firmware bug
		Wearout: 99,       // Drive is actually healthy with 99% life remaining
		Size:    1000204886016,
	}

	// Should not create an alert for health status
	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	m.mu.RLock()
	healthAlertID := "disk-health-test-instance-pve-node1-/dev/nvme0n1"
	if _, exists := m.activeAlerts[healthAlertID]; exists {
		m.mu.RUnlock()
		t.Fatalf("expected no health alert for Samsung 980 with known firmware bug")
	}
	m.mu.RUnlock()

	// Now test that wearout alerts still work for these drives
	disk.Wearout = 5 // Low wearout should still trigger alert
	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	m.mu.RLock()
	wearoutAlertID := "disk-wearout-test-instance-pve-node1-/dev/nvme0n1"
	if _, exists := m.activeAlerts[wearoutAlertID]; !exists {
		m.mu.RUnlock()
		t.Fatalf("expected wearout alert to still work for Samsung 980")
	}
	m.mu.RUnlock()
}

func TestCheckDiskHealthClearsExistingSamsung980Alerts(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	disk := proxmox.Disk{
		DevPath: "/dev/nvme0n1",
		Model:   "Samsung SSD 990 PRO 2TB",
		Serial:  "S6Z0NF0R654321",
		Type:    "nvme",
		Health:  "FAILED",
		Wearout: 98,
		Size:    2000398934016,
	}

	alertID := "disk-health-test-instance-pve-node1-/dev/nvme0n1"

	// Manually create an existing alert (simulating alert from before the fix)
	m.mu.Lock()
	m.activeAlerts[alertID] = &Alert{
		ID:           alertID,
		Type:         "disk-health",
		Level:        AlertLevelCritical,
		ResourceID:   "pve-node1-/dev/nvme0n1",
		ResourceName: "Samsung SSD 990 PRO 2TB (/dev/nvme0n1)",
		Node:         "pve-node1",
		Instance:     "test-instance",
		Message:      "Disk health check failed: FAILED",
	}
	m.mu.Unlock()

	// Check disk health - should clear the existing false alert
	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	m.mu.RLock()
	defer m.mu.RUnlock()
	if _, exists := m.activeAlerts[alertID]; exists {
		t.Fatalf("expected existing Samsung 990 health alert to be cleared")
	}
}

func TestCheckDiskHealthHealthyDiskNoAlert(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	// Non-Samsung disk with PASSED health should not create alert
	disk := proxmox.Disk{
		DevPath: "/dev/sda",
		Model:   "Western Digital WD40EFZX",
		Serial:  "WD-WCC4E0123456",
		Type:    "hdd",
		Health:  "PASSED",
		Wearout: 0, // N/A for HDD
		Size:    4000787030016,
	}

	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	m.mu.RLock()
	healthAlertID := "disk-health-test-instance-pve-node1-/dev/sda"
	if _, exists := m.activeAlerts[healthAlertID]; exists {
		m.mu.RUnlock()
		t.Fatalf("expected no health alert for healthy disk with PASSED status")
	}
	m.mu.RUnlock()

	// Also test with "OK" status
	disk.Health = "OK"
	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	m.mu.RLock()
	defer m.mu.RUnlock()
	if _, exists := m.activeAlerts[healthAlertID]; exists {
		t.Fatalf("expected no health alert for healthy disk with OK status")
	}
}

func TestCheckDiskHealthFailedDiskCreatesAlert(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	// Non-Samsung disk with FAILED health should create alert
	disk := proxmox.Disk{
		DevPath: "/dev/sdb",
		Model:   "Seagate ST2000DM008",
		Serial:  "ZA123456",
		Type:    "hdd",
		Health:  "FAILED",
		Wearout: 0,
		Size:    2000398934016,
	}

	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	m.mu.RLock()
	defer m.mu.RUnlock()

	healthAlertID := "disk-health-test-instance-pve-node1-/dev/sdb"
	alert, exists := m.activeAlerts[healthAlertID]
	if !exists {
		t.Fatalf("expected health alert to be created for failed disk")
	}

	if alert.Level != AlertLevelCritical {
		t.Errorf("expected critical alert level, got %s", alert.Level)
	}
	if alert.Type != "disk-health" {
		t.Errorf("expected type disk-health, got %s", alert.Type)
	}
	if alert.Node != "pve-node1" {
		t.Errorf("expected node pve-node1, got %s", alert.Node)
	}
	if alert.Instance != "test-instance" {
		t.Errorf("expected instance test-instance, got %s", alert.Instance)
	}
}

func TestCheckDiskHealthRecoveryAlertCleared(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	disk := proxmox.Disk{
		DevPath: "/dev/sdc",
		Model:   "Intel SSDSC2BB480G4",
		Serial:  "BTWL123456789",
		Type:    "ssd",
		Health:  "FAILED",
		Wearout: 50,
		Size:    480103981056,
	}

	// First check creates alert
	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	healthAlertID := "disk-health-test-instance-pve-node1-/dev/sdc"
	m.mu.RLock()
	if _, exists := m.activeAlerts[healthAlertID]; !exists {
		m.mu.RUnlock()
		t.Fatalf("expected health alert to be created")
	}
	m.mu.RUnlock()

	// Disk health recovers
	disk.Health = "PASSED"
	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	m.mu.RLock()
	defer m.mu.RUnlock()
	if _, exists := m.activeAlerts[healthAlertID]; exists {
		t.Fatalf("expected health alert to be cleared after recovery")
	}
}

func TestCheckDiskHealthLowWearoutCreatesAlert(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	// SSD with low wearout (less than 10% life remaining)
	disk := proxmox.Disk{
		DevPath: "/dev/nvme1n1",
		Model:   "Crucial CT1000MX500",
		Serial:  "12345678ABCD",
		Type:    "nvme",
		Health:  "PASSED",
		Wearout: 5, // Only 5% life remaining
		Size:    1000204886016,
	}

	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	m.mu.RLock()
	defer m.mu.RUnlock()

	wearoutAlertID := "disk-wearout-test-instance-pve-node1-/dev/nvme1n1"
	alert, exists := m.activeAlerts[wearoutAlertID]
	if !exists {
		t.Fatalf("expected wearout alert to be created for disk with low life remaining")
	}

	if alert.Level != AlertLevelWarning {
		t.Errorf("expected warning alert level, got %s", alert.Level)
	}
	if alert.Type != "disk-wearout" {
		t.Errorf("expected type disk-wearout, got %s", alert.Type)
	}
	if alert.Value != 5 {
		t.Errorf("expected value 5, got %f", alert.Value)
	}
	if alert.Threshold != 10.0 {
		t.Errorf("expected threshold 10.0, got %f", alert.Threshold)
	}
}

func TestCheckDiskHealthWearoutAlertUpdatesOnSubsequentChecks(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	disk := proxmox.Disk{
		DevPath: "/dev/nvme2n1",
		Model:   "Kingston SA2000M8",
		Serial:  "50026B768A123456",
		Type:    "nvme",
		Health:  "PASSED",
		Wearout: 8,
		Size:    500107862016,
	}

	// First check creates alert
	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	wearoutAlertID := "disk-wearout-test-instance-pve-node1-/dev/nvme2n1"
	m.mu.RLock()
	alert, exists := m.activeAlerts[wearoutAlertID]
	if !exists {
		m.mu.RUnlock()
		t.Fatalf("expected wearout alert to be created")
	}
	firstLastSeen := alert.LastSeen
	m.mu.RUnlock()

	// Wait a moment to ensure time difference
	time.Sleep(10 * time.Millisecond)

	// Wearout decreases further
	disk.Wearout = 6
	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	m.mu.RLock()
	defer m.mu.RUnlock()

	alert, exists = m.activeAlerts[wearoutAlertID]
	if !exists {
		t.Fatalf("expected wearout alert to still exist")
	}

	if !alert.LastSeen.After(firstLastSeen) {
		t.Errorf("expected LastSeen to be updated, got %v (original: %v)", alert.LastSeen, firstLastSeen)
	}
	if alert.Value != 6 {
		t.Errorf("expected value to be updated to 6, got %f", alert.Value)
	}
}

func TestCheckDiskHealthWearoutRecoveryAlertCleared(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	disk := proxmox.Disk{
		DevPath: "/dev/sdd",
		Model:   "ADATA SU800",
		Serial:  "2J012345678",
		Type:    "ssd",
		Health:  "PASSED",
		Wearout: 5,
		Size:    256060514304,
	}

	// First check creates wearout alert
	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	wearoutAlertID := "disk-wearout-test-instance-pve-node1-/dev/sdd"
	m.mu.RLock()
	if _, exists := m.activeAlerts[wearoutAlertID]; !exists {
		m.mu.RUnlock()
		t.Fatalf("expected wearout alert to be created")
	}
	m.mu.RUnlock()

	// Wearout recovers (replaced drive, or misread corrected)
	disk.Wearout = 95
	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	m.mu.RLock()
	defer m.mu.RUnlock()
	if _, exists := m.activeAlerts[wearoutAlertID]; exists {
		t.Fatalf("expected wearout alert to be cleared after recovery")
	}
}

func TestCheckDiskHealthEmptyOrUnknownHealthNoAlert(t *testing.T) {
	m := newTestManager(t)
	m.ClearActiveAlerts()

	disk := proxmox.Disk{
		DevPath: "/dev/sde",
		Model:   "Generic USB Storage",
		Serial:  "USB123456",
		Type:    "hdd",
		Health:  "", // Empty health - SMART not supported
		Wearout: 0,
		Size:    128043712512,
	}

	healthAlertID := "disk-health-test-instance-pve-node1-/dev/sde"

	// Empty health should not create alert
	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	m.mu.RLock()
	if _, exists := m.activeAlerts[healthAlertID]; exists {
		m.mu.RUnlock()
		t.Fatalf("expected no health alert for disk with empty health status")
	}
	m.mu.RUnlock()

	// UNKNOWN health should not create alert
	disk.Health = "UNKNOWN"
	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	m.mu.RLock()
	if _, exists := m.activeAlerts[healthAlertID]; exists {
		m.mu.RUnlock()
		t.Fatalf("expected no health alert for disk with UNKNOWN health status")
	}
	m.mu.RUnlock()

	// Lowercase "unknown" should also not create alert (normalized to uppercase)
	disk.Health = "unknown"
	m.CheckDiskHealth("test-instance", "pve-node1", disk)

	m.mu.RLock()
	defer m.mu.RUnlock()
	if _, exists := m.activeAlerts[healthAlertID]; exists {
		t.Fatalf("expected no health alert for disk with lowercase unknown health status")
	}
}

func TestDisableAllStorageClearsExistingAlerts(t *testing.T) {
	m := newTestManager(t)

	storageID := "local-lvm"

	// Start with configuration that allows storage alerts
	initialConfig := AlertConfig{
		Enabled:           true,
		DisableAllStorage: false,
		StorageDefault:    HysteresisThreshold{Trigger: 80, Clear: 75},
		TimeThreshold:     0,
		TimeThresholds:    map[string]int{},
		NodeDefaults: ThresholdConfig{
			CPU:    &HysteresisThreshold{Trigger: 80, Clear: 75},
			Memory: &HysteresisThreshold{Trigger: 85, Clear: 80},
			Disk:   &HysteresisThreshold{Trigger: 90, Clear: 85},
		},
		GuestDefaults: ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80, Clear: 75},
		},
		Overrides: make(map[string]ThresholdConfig),
	}
	m.UpdateConfig(initialConfig)
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.config.ActivationState = ActivationActive
	m.mu.Unlock()

	var dispatched []*Alert
	done := make(chan struct{}, 1)
	var resolved []string
	resolvedDone := make(chan struct{}, 1)
	m.SetAlertCallback(func(alert *Alert) {
		dispatched = append(dispatched, alert)
		select {
		case done <- struct{}{}:
		default:
		}
	})
	m.SetResolvedCallback(func(alertID string) {
		resolved = append(resolved, alertID)
		select {
		case resolvedDone <- struct{}{}:
		default:
		}
	})

	storage := models.Storage{
		ID:     storageID,
		Name:   "local-lvm",
		Usage:  90.0,
		Status: "available",
	}

	// Initial check should trigger an alert
	m.CheckStorage(storage)
	select {
	case <-done:
	case <-time.After(100 * time.Millisecond):
		t.Fatalf("did not receive initial alert dispatch")
	}
	if len(dispatched) != 1 {
		t.Fatalf("expected 1 alert before disabling storage, got %d", len(dispatched))
	}

	// Apply config with DisableAllStorage enabled
	disabledConfig := initialConfig
	disabledConfig.DisableAllStorage = true
	m.UpdateConfig(disabledConfig)
	m.mu.Lock()
	m.config.TimeThreshold = 0
	m.config.TimeThresholds = map[string]int{}
	m.config.ActivationState = ActivationActive
	m.mu.Unlock()

	// Clear dispatched slice to capture only post-disable notifications
	dispatched = dispatched[:0]
	done = make(chan struct{}, 1)

	// Re-run CheckStorage with high usage; no alert should be dispatched
	m.CheckStorage(storage)
	select {
	case <-done:
		t.Fatalf("expected no alerts after disabling all storage, but callback fired")
	case <-time.After(100 * time.Millisecond):
		// No callback fired as expected
	}

	// Active alerts should be cleared by reevaluateActiveAlertsLocked
	m.mu.RLock()
	activeCount := len(m.activeAlerts)
	m.mu.RUnlock()
	if activeCount != 0 {
		t.Fatalf("expected active alerts to be cleared after disabling all storage, got %d", activeCount)
	}

	// Resolved callback should have fired
	select {
	case <-resolvedDone:
	case <-time.After(100 * time.Millisecond):
		t.Fatalf("expected resolved callback to fire after disabling all storage")
	}
	expectedAlertID := fmt.Sprintf("%s-usage", storageID)
	if len(resolved) != 1 || resolved[0] != expectedAlertID {
		t.Fatalf("expected resolved callback for %s, got %v", expectedAlertID, resolved)
	}

	// Pending alert should be cleared
	m.mu.RLock()
	_, isPending := m.pendingAlerts[expectedAlertID]
	m.mu.RUnlock()
	if isPending {
		t.Fatalf("expected pending alert entry to be cleared after disabling all storage")
	}
}

func TestUpdateConfigPreservesZeroDockerThresholds(t *testing.T) {
	t.Helper()

	m := newTestManager(t)
	config := m.GetConfig()
	config.DockerDefaults.Memory = HysteresisThreshold{Trigger: 0, Clear: 0}

	m.UpdateConfig(config)

	m.mu.RLock()
	defer m.mu.RUnlock()

	if m.config.DockerDefaults.Memory.Trigger != 0 {
		t.Fatalf("expected docker memory trigger to remain 0 when disabled, got %.1f", m.config.DockerDefaults.Memory.Trigger)
	}
	if m.config.DockerDefaults.Memory.Clear != 0 {
		t.Fatalf("expected docker memory clear to remain 0 when disabled, got %.1f", m.config.DockerDefaults.Memory.Clear)
	}
}

func TestReevaluateClearsDockerContainerAlertWhenOverrideDisabled(t *testing.T) {
	m := newTestManager(t)

	resourceID := "docker:host-1/container-1"
	alertID := resourceID + "-memory"

	resolved := make(chan string, 1)
	m.SetResolvedCallback(func(id string) {
		resolved <- id
	})

	m.mu.Lock()
	m.activeAlerts[alertID] = &Alert{
		ID:           alertID,
		Type:         "memory",
		ResourceID:   resourceID,
		ResourceName: "qbittorrent",
		Instance:     "Docker",
		Metadata: map[string]interface{}{
			"resourceType": "Docker Container",
		},
		Threshold: 80,
		Value:     90,
	}
	m.mu.Unlock()

	config := m.GetConfig()
	config.Overrides = map[string]ThresholdConfig{
		resourceID: {
			Disabled: true,
		},
	}
	config.ActivationState = ActivationActive

	m.UpdateConfig(config)

	select {
	case got := <-resolved:
		if got != alertID {
			t.Fatalf("resolved callback fired for unexpected alert %s", got)
		}
	case <-time.After(200 * time.Millisecond):
		t.Fatalf("expected alert to be resolved when docker container override is disabled")
	}

	m.mu.RLock()
	_, exists := m.activeAlerts[alertID]
	m.mu.RUnlock()
	if exists {
		t.Fatalf("expected docker container alert to be cleared when override is disabled")
	}
}

func TestReevaluateClearsDockerContainerAlertWhenIgnoredPrefixAdded(t *testing.T) {
	m := newTestManager(t)

	resourceID := "docker:host-2/container-abc123"
	alertID := resourceID + "-cpu"

	resolved := make(chan string, 1)
	m.SetResolvedCallback(func(id string) {
		resolved <- id
	})

	m.mu.Lock()
	m.activeAlerts[alertID] = &Alert{
		ID:           alertID,
		Type:         "cpu",
		ResourceID:   resourceID,
		ResourceName: "qbittorrentvpn",
		Instance:     "Docker",
		Metadata: map[string]interface{}{
			"resourceType":  "Docker Container",
			"containerId":   "abc123",
			"containerName": "qbittorrentvpn",
		},
		Threshold: 80,
		Value:     95,
	}
	m.mu.Unlock()

	config := m.GetConfig()
	config.DockerIgnoredContainerPrefixes = []string{"qbit"}
	config.ActivationState = ActivationActive

	m.UpdateConfig(config)

	select {
	case got := <-resolved:
		if got != alertID {
			t.Fatalf("resolved callback fired for unexpected alert %s", got)
		}
	case <-time.After(200 * time.Millisecond):
		t.Fatalf("expected alert to be resolved after adding ignored prefix")
	}

	m.mu.RLock()
	_, exists := m.activeAlerts[alertID]
	m.mu.RUnlock()
	if exists {
		t.Fatalf("expected docker container alert to be cleared when ignored prefix is configured")
	}
}

func TestBuildGuestKey(t *testing.T) {
	// t.Parallel()

	tests := []struct {
		name     string
		instance string
		node     string
		vmid     int
		want     string
	}{
		{
			name:     "different instance and node",
			instance: "cluster-1",
			node:     "pve-node",
			vmid:     100,
			want:     "cluster-1:pve-node:100",
		},
		{
			name:     "same instance and node",
			instance: "pve-node",
			node:     "pve-node",
			vmid:     200,
			want:     "pve-node:pve-node:200",
		},
		{
			name:     "empty instance uses node",
			instance: "",
			node:     "pve-node",
			vmid:     300,
			want:     "pve-node:pve-node:300",
		},
		{
			name:     "whitespace instance uses node",
			instance: "   ",
			node:     "pve-node",
			vmid:     400,
			want:     "pve-node:pve-node:400",
		},
		{
			name:     "instance with whitespace trimmed",
			instance: "  cluster-1  ",
			node:     "pve-node",
			vmid:     500,
			want:     "cluster-1:pve-node:500",
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// t.Parallel()
			got := BuildGuestKey(tt.instance, tt.node, tt.vmid)
			if got != tt.want {
				t.Errorf("BuildGuestKey(%q, %q, %d) = %q, want %q", tt.instance, tt.node, tt.vmid, got, tt.want)
			}
		})
	}
}

func TestCheckFlapping(t *testing.T) {
	// t.Parallel()

	tests := []struct {
		name              string
		flappingEnabled   bool
		threshold         int
		windowSeconds     int
		cooldownMinutes   int
		historyEntries    int // number of state changes to simulate before the test call
		expectFlapping    bool
		expectNewFlapping bool // should this trigger a new flapping detection (vs already flapping)
	}{
		{
			name:            "disabled returns false",
			flappingEnabled: false,
			threshold:       5,
			windowSeconds:   300,
			historyEntries:  10, // way over threshold
			expectFlapping:  false,
		},
		{
			name:            "below threshold returns false",
			flappingEnabled: true,
			threshold:       5,
			windowSeconds:   300,
			historyEntries:  2, // only 2 + 1 (test call) = 3 < 5
			expectFlapping:  false,
		},
		{
			name:              "at threshold triggers new flapping",
			flappingEnabled:   true,
			threshold:         5,
			windowSeconds:     300,
			cooldownMinutes:   15,
			historyEntries:    4, // 4 + 1 (test call) = 5 == threshold
			expectFlapping:    true,
			expectNewFlapping: true,
		},
		{
			name:              "above threshold triggers flapping",
			flappingEnabled:   true,
			threshold:         5,
			windowSeconds:     300,
			cooldownMinutes:   15,
			historyEntries:    6, // 6 + 1 = 7 > 5
			expectFlapping:    true,
			expectNewFlapping: true,
		},
		{
			name:            "single state change below threshold",
			flappingEnabled: true,
			threshold:       5,
			windowSeconds:   300,
			historyEntries:  0, // only the test call = 1 < 5
			expectFlapping:  false,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// t.Parallel()

			m := newTestManager(t)

			// Configure flapping settings
			m.mu.Lock()
			m.config.FlappingEnabled = tt.flappingEnabled
			m.config.FlappingThreshold = tt.threshold
			m.config.FlappingWindowSeconds = tt.windowSeconds
			m.config.FlappingCooldownMinutes = tt.cooldownMinutes

			alertID := "test-alert-" + tt.name

			// Add history entries within the time window
			now := time.Now()
			for i := 0; i < tt.historyEntries; i++ {
				m.flappingHistory[alertID] = append(m.flappingHistory[alertID], now.Add(-time.Duration(i)*time.Second))
			}
			m.mu.Unlock()

			// Call checkFlappingLocked
			m.mu.Lock()
			result := m.checkFlappingLocked(alertID)
			m.mu.Unlock()

			if result != tt.expectFlapping {
				t.Errorf("checkFlappingLocked() = %v, want %v", result, tt.expectFlapping)
			}

			// Check if flapping was newly detected
			m.mu.RLock()
			isFlappingActive := m.flappingActive[alertID]
			_, hasSuppression := m.suppressedUntil[alertID]
			m.mu.RUnlock()

			if tt.expectNewFlapping {
				if !isFlappingActive {
					t.Errorf("expected flappingActive[%s] to be true", alertID)
				}
				if !hasSuppression {
					t.Errorf("expected suppressedUntil[%s] to be set", alertID)
				}
			}
		})
	}
}

func TestCheckFlappingAlreadyFlapping(t *testing.T) {
	// t.Parallel()

	m := newTestManager(t)

	alertID := "already-flapping-alert"

	m.mu.Lock()
	m.config.FlappingEnabled = true
	m.config.FlappingThreshold = 3
	m.config.FlappingWindowSeconds = 300
	m.config.FlappingCooldownMinutes = 15

	// Pre-set flapping state
	m.flappingActive[alertID] = true
	existingSuppression := time.Now().Add(10 * time.Minute)
	m.suppressedUntil[alertID] = existingSuppression

	// Add history to exceed threshold
	now := time.Now()
	m.flappingHistory[alertID] = []time.Time{
		now.Add(-10 * time.Second),
		now.Add(-5 * time.Second),
	}
	m.mu.Unlock()

	// Call checkFlappingLocked - should return true but NOT update suppression
	m.mu.Lock()
	result := m.checkFlappingLocked(alertID)
	m.mu.Unlock()

	if !result {
		t.Errorf("checkFlappingLocked() = false, want true for already flapping alert")
	}

	// Verify suppression time was NOT updated (existing suppression should remain)
	m.mu.RLock()
	currentSuppression := m.suppressedUntil[alertID]
	m.mu.RUnlock()

	if !currentSuppression.Equal(existingSuppression) {
		t.Errorf("suppressedUntil was updated from %v to %v; should remain unchanged for already-flapping alert",
			existingSuppression, currentSuppression)
	}
}

func TestCheckFlappingWindowExpiry(t *testing.T) {
	// t.Parallel()

	m := newTestManager(t)

	alertID := "window-expiry-alert"

	m.mu.Lock()
	m.config.FlappingEnabled = true
	m.config.FlappingThreshold = 3
	m.config.FlappingWindowSeconds = 60 // 1 minute window

	// Add old history entries outside the window
	now := time.Now()
	m.flappingHistory[alertID] = []time.Time{
		now.Add(-5 * time.Minute), // outside 1 minute window
		now.Add(-4 * time.Minute), // outside 1 minute window
		now.Add(-3 * time.Minute), // outside 1 minute window
		now.Add(-2 * time.Minute), // outside 1 minute window
	}
	m.mu.Unlock()

	// Call checkFlappingLocked - old entries should be pruned
	m.mu.Lock()
	result := m.checkFlappingLocked(alertID)
	historyLen := len(m.flappingHistory[alertID])
	m.mu.Unlock()

	if result {
		t.Errorf("checkFlappingLocked() = true, want false (old entries should be pruned)")
	}

	// Only the current call should remain in history
	if historyLen != 1 {
		t.Errorf("history length = %d, want 1 (old entries should be pruned)", historyLen)
	}
}

func TestGetGlobalMetricTimeThreshold(t *testing.T) {
	// t.Parallel()

	tests := []struct {
		name                 string
		metricTimeThresholds map[string]map[string]int
		metricType           string
		wantDelay            int
		wantFound            bool
	}{
		{
			name:                 "empty MetricTimeThresholds returns false",
			metricTimeThresholds: nil,
			metricType:           "cpu",
			wantDelay:            0,
			wantFound:            false,
		},
		{
			name:                 "no all key returns false",
			metricTimeThresholds: map[string]map[string]int{"specific": {"cpu": 60}},
			metricType:           "cpu",
			wantDelay:            0,
			wantFound:            false,
		},
		{
			name:                 "empty all map returns false",
			metricTimeThresholds: map[string]map[string]int{"all": {}},
			metricType:           "cpu",
			wantDelay:            0,
			wantFound:            false,
		},
		{
			name:                 "empty metricType returns false",
			metricTimeThresholds: map[string]map[string]int{"all": {"cpu": 60}},
			metricType:           "",
			wantDelay:            0,
			wantFound:            false,
		},
		{
			name:                 "whitespace metricType returns false",
			metricTimeThresholds: map[string]map[string]int{"all": {"cpu": 60}},
			metricType:           "   ",
			wantDelay:            0,
			wantFound:            false,
		},
		{
			name:                 "direct metric match",
			metricTimeThresholds: map[string]map[string]int{"all": {"cpu": 120, "memory": 90}},
			metricType:           "cpu",
			wantDelay:            120,
			wantFound:            true,
		},
		{
			name:                 "metric match case insensitive",
			metricTimeThresholds: map[string]map[string]int{"all": {"cpu": 120}},
			metricType:           "CPU",
			wantDelay:            120,
			wantFound:            true,
		},
		{
			name:                 "metric match with whitespace",
			metricTimeThresholds: map[string]map[string]int{"all": {"cpu": 120}},
			metricType:           "  cpu  ",
			wantDelay:            120,
			wantFound:            true,
		},
		{
			name:                 "default fallback",
			metricTimeThresholds: map[string]map[string]int{"all": {"default": 30}},
			metricType:           "unknown",
			wantDelay:            30,
			wantFound:            true,
		},
		{
			name:                 "_default fallback",
			metricTimeThresholds: map[string]map[string]int{"all": {"_default": 45}},
			metricType:           "unknown",
			wantDelay:            45,
			wantFound:            true,
		},
		{
			name:                 "wildcard fallback",
			metricTimeThresholds: map[string]map[string]int{"all": {"*": 15}},
			metricType:           "unknown",
			wantDelay:            15,
			wantFound:            true,
		},
		{
			name:                 "direct match takes precedence over default",
			metricTimeThresholds: map[string]map[string]int{"all": {"cpu": 120, "default": 30}},
			metricType:           "cpu",
			wantDelay:            120,
			wantFound:            true,
		},
		{
			name:                 "no match and no fallback returns false",
			metricTimeThresholds: map[string]map[string]int{"all": {"cpu": 120, "memory": 90}},
			metricType:           "disk",
			wantDelay:            0,
			wantFound:            false,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// t.Parallel()

			m := newTestManager(t)
			m.mu.Lock()
			m.config.MetricTimeThresholds = tt.metricTimeThresholds
			m.mu.Unlock()

			m.mu.RLock()
			gotDelay, gotFound := m.getGlobalMetricTimeThreshold(tt.metricType)
			m.mu.RUnlock()

			if gotDelay != tt.wantDelay {
				t.Errorf("getGlobalMetricTimeThreshold() delay = %d, want %d", gotDelay, tt.wantDelay)
			}
			if gotFound != tt.wantFound {
				t.Errorf("getGlobalMetricTimeThreshold() found = %v, want %v", gotFound, tt.wantFound)
			}
		})
	}
}

func TestGetBaseTimeThreshold(t *testing.T) {
	// t.Parallel()

	tests := []struct {
		name           string
		timeThresholds map[string]int
		timeThreshold  int // global fallback
		resourceType   string
		wantDelay      int
		wantFound      bool
	}{
		{
			name:           "nil TimeThresholds returns global TimeThreshold",
			timeThresholds: nil,
			timeThreshold:  60,
			resourceType:   "guest",
			wantDelay:      60,
			wantFound:      false,
		},
		{
			name:           "direct resource type match",
			timeThresholds: map[string]int{"guest": 120, "node": 90},
			timeThreshold:  60,
			resourceType:   "guest",
			wantDelay:      120,
			wantFound:      true,
		},
		{
			name:           "canonical key match for vm",
			timeThresholds: map[string]int{"guest": 120},
			timeThreshold:  60,
			resourceType:   "vm",
			wantDelay:      120,
			wantFound:      true,
		},
		{
			name:           "canonical key match for container",
			timeThresholds: map[string]int{"guest": 120},
			timeThreshold:  60,
			resourceType:   "container",
			wantDelay:      120,
			wantFound:      true,
		},
		{
			name:           "all fallback when no specific match",
			timeThresholds: map[string]int{"all": 45},
			timeThreshold:  60,
			resourceType:   "storage",
			wantDelay:      45,
			wantFound:      false, // "all" returns found=false
		},
		{
			name:           "specific match takes precedence over all",
			timeThresholds: map[string]int{"storage": 30, "all": 45},
			timeThreshold:  60,
			resourceType:   "storage",
			wantDelay:      30,
			wantFound:      true,
		},
		{
			name:           "no match and no all returns global threshold",
			timeThresholds: map[string]int{"guest": 120},
			timeThreshold:  60,
			resourceType:   "storage",
			wantDelay:      60,
			wantFound:      false,
		},
		{
			name:           "empty TimeThresholds returns global threshold",
			timeThresholds: map[string]int{},
			timeThreshold:  60,
			resourceType:   "guest",
			wantDelay:      60,
			wantFound:      false,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// t.Parallel()

			m := newTestManager(t)
			m.mu.Lock()
			m.config.TimeThresholds = tt.timeThresholds
			m.config.TimeThreshold = tt.timeThreshold
			m.mu.Unlock()

			m.mu.RLock()
			gotDelay, gotFound := m.getBaseTimeThreshold(tt.resourceType)
			m.mu.RUnlock()

			if gotDelay != tt.wantDelay {
				t.Errorf("getBaseTimeThreshold() delay = %d, want %d", gotDelay, tt.wantDelay)
			}
			if gotFound != tt.wantFound {
				t.Errorf("getBaseTimeThreshold() found = %v, want %v", gotFound, tt.wantFound)
			}
		})
	}
}

func TestGetMetricTimeThreshold(t *testing.T) {
	// t.Parallel()

	tests := []struct {
		name                 string
		metricTimeThresholds map[string]map[string]int
		resourceType         string
		metricType           string
		wantDelay            int
		wantFound            bool
	}{
		{
			name:                 "empty MetricTimeThresholds returns false",
			metricTimeThresholds: nil,
			resourceType:         "guest",
			metricType:           "cpu",
			wantDelay:            0,
			wantFound:            false,
		},
		{
			name:                 "empty metricType returns false",
			metricTimeThresholds: map[string]map[string]int{"guest": {"cpu": 60}},
			resourceType:         "guest",
			metricType:           "",
			wantDelay:            0,
			wantFound:            false,
		},
		{
			name:                 "whitespace metricType returns false",
			metricTimeThresholds: map[string]map[string]int{"guest": {"cpu": 60}},
			resourceType:         "guest",
			metricType:           "   ",
			wantDelay:            0,
			wantFound:            false,
		},
		{
			name:                 "direct match on resourceType and metricType",
			metricTimeThresholds: map[string]map[string]int{"guest": {"cpu": 120, "memory": 90}},
			resourceType:         "guest",
			metricType:           "cpu",
			wantDelay:            120,
			wantFound:            true,
		},
		{
			name:                 "canonical key match vm to guest",
			metricTimeThresholds: map[string]map[string]int{"guest": {"cpu": 120}},
			resourceType:         "vm",
			metricType:           "cpu",
			wantDelay:            120,
			wantFound:            true,
		},
		{
			name:                 "canonical key match container to guest",
			metricTimeThresholds: map[string]map[string]int{"guest": {"memory": 90}},
			resourceType:         "container",
			metricType:           "memory",
			wantDelay:            90,
			wantFound:            true,
		},
		{
			name:                 "default fallback within resourceType",
			metricTimeThresholds: map[string]map[string]int{"guest": {"default": 30}},
			resourceType:         "guest",
			metricType:           "unknown",
			wantDelay:            30,
			wantFound:            true,
		},
		{
			name:                 "_default fallback within resourceType",
			metricTimeThresholds: map[string]map[string]int{"guest": {"_default": 45}},
			resourceType:         "guest",
			metricType:           "unknown",
			wantDelay:            45,
			wantFound:            true,
		},
		{
			name:                 "wildcard fallback within resourceType",
			metricTimeThresholds: map[string]map[string]int{"guest": {"*": 15}},
			resourceType:         "guest",
			metricType:           "unknown",
			wantDelay:            15,
			wantFound:            true,
		},
		{
			name:                 "direct match takes precedence over default",
			metricTimeThresholds: map[string]map[string]int{"guest": {"cpu": 120, "default": 30}},
			resourceType:         "guest",
			metricType:           "cpu",
			wantDelay:            120,
			wantFound:            true,
		},
		{
			name:                 "no match for resourceType returns false",
			metricTimeThresholds: map[string]map[string]int{"node": {"cpu": 60}},
			resourceType:         "guest",
			metricType:           "cpu",
			wantDelay:            0,
			wantFound:            false,
		},
		{
			name:                 "empty perType map skipped",
			metricTimeThresholds: map[string]map[string]int{"guest": {}},
			resourceType:         "guest",
			metricType:           "cpu",
			wantDelay:            0,
			wantFound:            false,
		},
		{
			name:                 "metricType case insensitive",
			metricTimeThresholds: map[string]map[string]int{"guest": {"cpu": 120}},
			resourceType:         "guest",
			metricType:           "CPU",
			wantDelay:            120,
			wantFound:            true,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// t.Parallel()

			m := newTestManager(t)
			m.mu.Lock()
			m.config.MetricTimeThresholds = tt.metricTimeThresholds
			m.mu.Unlock()

			m.mu.RLock()
			gotDelay, gotFound := m.getMetricTimeThreshold(tt.resourceType, tt.metricType)
			m.mu.RUnlock()

			if gotDelay != tt.wantDelay {
				t.Errorf("getMetricTimeThreshold() delay = %d, want %d", gotDelay, tt.wantDelay)
			}
			if gotFound != tt.wantFound {
				t.Errorf("getMetricTimeThreshold() found = %v, want %v", gotFound, tt.wantFound)
			}
		})
	}
}

func TestCheckRateLimit(t *testing.T) {
	// t.Parallel()

	t.Run("no rate limit when MaxAlertsHour is zero", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Schedule.MaxAlertsHour = 0
		m.mu.Unlock()

		m.mu.Lock()
		result := m.checkRateLimit("test-alert")
		m.mu.Unlock()

		if !result {
			t.Errorf("checkRateLimit() = false, want true when MaxAlertsHour is 0")
		}
	})

	t.Run("no rate limit when MaxAlertsHour is negative", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Schedule.MaxAlertsHour = -1
		m.mu.Unlock()

		m.mu.Lock()
		result := m.checkRateLimit("test-alert")
		m.mu.Unlock()

		if !result {
			t.Errorf("checkRateLimit() = false, want true when MaxAlertsHour is negative")
		}
	})

	t.Run("allows alerts under rate limit", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Schedule.MaxAlertsHour = 5
		m.mu.Unlock()

		// First 5 alerts should be allowed
		for i := 0; i < 5; i++ {
			m.mu.Lock()
			result := m.checkRateLimit("test-alert")
			m.mu.Unlock()

			if !result {
				t.Errorf("checkRateLimit() call %d = false, want true (under limit)", i+1)
			}
		}
	})

	t.Run("blocks alerts at rate limit", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Schedule.MaxAlertsHour = 3
		m.mu.Unlock()

		// Use up the rate limit
		for i := 0; i < 3; i++ {
			m.mu.Lock()
			_ = m.checkRateLimit("test-alert")
			m.mu.Unlock()
		}

		// Fourth alert should be blocked
		m.mu.Lock()
		result := m.checkRateLimit("test-alert")
		m.mu.Unlock()

		if result {
			t.Errorf("checkRateLimit() = true, want false (at rate limit)")
		}
	})

	t.Run("different alert IDs have separate limits", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Schedule.MaxAlertsHour = 2
		m.mu.Unlock()

		// Use up limit for alert-1
		for i := 0; i < 2; i++ {
			m.mu.Lock()
			_ = m.checkRateLimit("alert-1")
			m.mu.Unlock()
		}

		// alert-2 should still be allowed
		m.mu.Lock()
		result := m.checkRateLimit("alert-2")
		m.mu.Unlock()

		if !result {
			t.Errorf("checkRateLimit(alert-2) = false, want true (separate limit)")
		}
	})

	t.Run("old entries are cleaned up", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Schedule.MaxAlertsHour = 2

		// Pre-populate with old entries (more than 1 hour ago)
		oldTime := time.Now().Add(-2 * time.Hour)
		m.alertRateLimit["test-alert"] = []time.Time{oldTime, oldTime}
		m.mu.Unlock()

		// Should be allowed because old entries are cleaned up
		m.mu.Lock()
		result := m.checkRateLimit("test-alert")
		m.mu.Unlock()

		if !result {
			t.Errorf("checkRateLimit() = false, want true (old entries should be cleaned)")
		}
	})

	t.Run("mixed old and recent entries", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Schedule.MaxAlertsHour = 2

		// Pre-populate with 1 old and 1 recent entry
		oldTime := time.Now().Add(-2 * time.Hour)
		recentTime := time.Now().Add(-30 * time.Minute)
		m.alertRateLimit["test-alert"] = []time.Time{oldTime, recentTime}
		m.mu.Unlock()

		// First call should be allowed (1 recent + 1 new = 2)
		m.mu.Lock()
		result1 := m.checkRateLimit("test-alert")
		m.mu.Unlock()

		if !result1 {
			t.Errorf("checkRateLimit() call 1 = false, want true")
		}

		// Second call should be blocked (2 recent + 1 new would exceed 2)
		m.mu.Lock()
		result2 := m.checkRateLimit("test-alert")
		m.mu.Unlock()

		if result2 {
			t.Errorf("checkRateLimit() call 2 = true, want false (at limit)")
		}
	})
}

func TestApplyRelaxedGuestThresholds(t *testing.T) {
	// t.Parallel()

	t.Run("nil thresholds get defaults", func(t *testing.T) {
		// t.Parallel()
		cfg := ThresholdConfig{
			CPU:    nil,
			Memory: nil,
			Disk:   nil,
		}

		result := applyRelaxedGuestThresholds(cfg)

		if result.CPU == nil {
			t.Fatal("expected CPU threshold to be set")
		}
		if result.CPU.Trigger != 95 {
			t.Errorf("CPU.Trigger = %v, want 95", result.CPU.Trigger)
		}
		if result.CPU.Clear != 90 {
			t.Errorf("CPU.Clear = %v, want 90", result.CPU.Clear)
		}

		if result.Memory == nil {
			t.Fatal("expected Memory threshold to be set")
		}
		if result.Memory.Trigger != 92 {
			t.Errorf("Memory.Trigger = %v, want 92", result.Memory.Trigger)
		}

		if result.Disk == nil {
			t.Fatal("expected Disk threshold to be set")
		}
		if result.Disk.Trigger != 95 {
			t.Errorf("Disk.Trigger = %v, want 95", result.Disk.Trigger)
		}
	})

	t.Run("low thresholds raised to minimum", func(t *testing.T) {
		// t.Parallel()
		cfg := ThresholdConfig{
			CPU:    &HysteresisThreshold{Trigger: 50, Clear: 45},
			Memory: &HysteresisThreshold{Trigger: 60, Clear: 55},
			Disk:   &HysteresisThreshold{Trigger: 70, Clear: 65},
		}

		result := applyRelaxedGuestThresholds(cfg)

		if result.CPU.Trigger != 95 {
			t.Errorf("CPU.Trigger = %v, want 95 (raised to minimum)", result.CPU.Trigger)
		}
		if result.Memory.Trigger != 92 {
			t.Errorf("Memory.Trigger = %v, want 92 (raised to minimum)", result.Memory.Trigger)
		}
		if result.Disk.Trigger != 95 {
			t.Errorf("Disk.Trigger = %v, want 95 (raised to minimum)", result.Disk.Trigger)
		}
	})

	t.Run("high thresholds unchanged", func(t *testing.T) {
		// t.Parallel()
		cfg := ThresholdConfig{
			CPU:    &HysteresisThreshold{Trigger: 98, Clear: 93},
			Memory: &HysteresisThreshold{Trigger: 95, Clear: 90},
			Disk:   &HysteresisThreshold{Trigger: 99, Clear: 94},
		}

		result := applyRelaxedGuestThresholds(cfg)

		if result.CPU.Trigger != 98 {
			t.Errorf("CPU.Trigger = %v, want 98 (unchanged)", result.CPU.Trigger)
		}
		if result.Memory.Trigger != 95 {
			t.Errorf("Memory.Trigger = %v, want 95 (unchanged)", result.Memory.Trigger)
		}
		if result.Disk.Trigger != 99 {
			t.Errorf("Disk.Trigger = %v, want 99 (unchanged)", result.Disk.Trigger)
		}
	})

	t.Run("clear adjusted when too close to trigger", func(t *testing.T) {
		// t.Parallel()
		cfg := ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 95, Clear: 96}, // Clear >= Trigger
		}

		result := applyRelaxedGuestThresholds(cfg)

		if result.CPU.Clear >= result.CPU.Trigger {
			t.Errorf("CPU.Clear = %v should be less than Trigger = %v", result.CPU.Clear, result.CPU.Trigger)
		}
		if result.CPU.Clear != 90 {
			t.Errorf("CPU.Clear = %v, want 90 (Trigger - 5)", result.CPU.Clear)
		}
	})

	t.Run("clear clamped at zero when it would go negative", func(t *testing.T) {
		// t.Parallel()
		// Create a threshold where Trigger is above min but Clear would go negative
		// The adjust function sets Clear = Trigger - 5 if Clear >= Trigger
		// Then clamps to 0 if Clear < 0
		// Since all triggers get raised to 95/92/95, the negative clamp path
		// won't be hit in normal use. Test the logic directly with a config
		// that has Trigger exactly at minimum and Clear at minimum
		cfg := ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 95, Clear: 3},
		}

		result := applyRelaxedGuestThresholds(cfg)

		// Clear at 3 is valid (less than Trigger 95), should stay at 3
		if result.CPU.Trigger != 95 {
			t.Errorf("CPU.Trigger = %v, want 95", result.CPU.Trigger)
		}
		if result.CPU.Clear != 3 {
			t.Errorf("CPU.Clear = %v, want 3 (unchanged since < Trigger)", result.CPU.Clear)
		}
	})

	t.Run("original config unchanged", func(t *testing.T) {
		// t.Parallel()
		original := ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 50, Clear: 45},
		}

		_ = applyRelaxedGuestThresholds(original)

		// Original should be unchanged
		if original.CPU.Trigger != 50 {
			t.Errorf("original CPU.Trigger = %v, want 50 (should be unchanged)", original.CPU.Trigger)
		}
	})
}

func TestShouldNotifyAfterCooldown(t *testing.T) {
	// t.Parallel()

	t.Run("cooldown disabled allows notification", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Schedule.Cooldown = 0
		m.mu.Unlock()

		alert := &Alert{
			ID:           "test-alert",
			LastNotified: nil,
		}

		if !m.shouldNotifyAfterCooldown(alert) {
			t.Error("expected true when cooldown is 0")
		}
	})

	t.Run("negative cooldown allows notification", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Schedule.Cooldown = -5
		m.mu.Unlock()

		now := time.Now()
		alert := &Alert{
			ID:           "test-alert",
			LastNotified: &now,
		}

		if !m.shouldNotifyAfterCooldown(alert) {
			t.Error("expected true when cooldown is negative")
		}
	})

	t.Run("first notification allowed when never notified", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Schedule.Cooldown = 30 // 30 minutes
		m.mu.Unlock()

		alert := &Alert{
			ID:           "test-alert",
			LastNotified: nil,
		}

		if !m.shouldNotifyAfterCooldown(alert) {
			t.Error("expected true when alert has never been notified")
		}
	})

	t.Run("notification blocked during cooldown period", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Schedule.Cooldown = 30 // 30 minutes
		m.mu.Unlock()

		lastNotified := time.Now().Add(-10 * time.Minute) // Notified 10 minutes ago
		alert := &Alert{
			ID:           "test-alert",
			LastNotified: &lastNotified,
		}

		if m.shouldNotifyAfterCooldown(alert) {
			t.Error("expected false when still in cooldown period")
		}
	})

	t.Run("notification allowed after cooldown expires", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Schedule.Cooldown = 30 // 30 minutes
		m.mu.Unlock()

		lastNotified := time.Now().Add(-45 * time.Minute) // Notified 45 minutes ago
		alert := &Alert{
			ID:           "test-alert",
			LastNotified: &lastNotified,
		}

		if !m.shouldNotifyAfterCooldown(alert) {
			t.Error("expected true after cooldown period expires")
		}
	})

	t.Run("notification allowed at exact cooldown boundary", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Schedule.Cooldown = 30 // 30 minutes
		m.mu.Unlock()

		lastNotified := time.Now().Add(-30 * time.Minute) // Exactly 30 minutes ago
		alert := &Alert{
			ID:           "test-alert",
			LastNotified: &lastNotified,
		}

		if !m.shouldNotifyAfterCooldown(alert) {
			t.Error("expected true at exact cooldown boundary (>=)")
		}
	})
}

func TestDockerServiceDisplayName(t *testing.T) {
	// t.Parallel()

	tests := []struct {
		name     string
		service  models.DockerService
		expected string
	}{
		{
			name:     "returns name when present",
			service:  models.DockerService{Name: "my-service", ID: "abc123456789xyz"},
			expected: "my-service",
		},
		{
			name:     "returns trimmed name",
			service:  models.DockerService{Name: "  my-service  ", ID: "abc123456789xyz"},
			expected: "my-service",
		},
		{
			name:     "returns truncated ID when name is empty",
			service:  models.DockerService{Name: "", ID: "abc123456789xyz"},
			expected: "abc123456789",
		},
		{
			name:     "returns full short ID when less than 12 chars",
			service:  models.DockerService{Name: "", ID: "abc123"},
			expected: "abc123",
		},
		{
			name:     "returns trimmed ID",
			service:  models.DockerService{Name: "", ID: "  abc123456789xyz  "},
			expected: "abc123456789",
		},
		{
			name:     "returns 'service' when both name and ID empty",
			service:  models.DockerService{Name: "", ID: ""},
			expected: "service",
		},
		{
			name:     "returns 'service' when both whitespace only",
			service:  models.DockerService{Name: "   ", ID: "   "},
			expected: "service",
		},
		{
			name:     "prefers name over ID",
			service:  models.DockerService{Name: "preferred", ID: "not-this-id"},
			expected: "preferred",
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// t.Parallel()
			result := dockerServiceDisplayName(tt.service)
			if result != tt.expected {
				t.Errorf("dockerServiceDisplayName(%+v) = %q, want %q", tt.service, result, tt.expected)
			}
		})
	}
}

func TestDockerServiceResourceID(t *testing.T) {
	// t.Parallel()

	tests := []struct {
		name        string
		hostID      string
		serviceID   string
		serviceName string
		expected    string
	}{
		{
			name:        "with host and service ID",
			hostID:      "host-1",
			serviceID:   "svc-123",
			serviceName: "my-service",
			expected:    "docker:host-1/service/svc-123",
		},
		{
			name:        "without host ID uses service prefix only",
			hostID:      "",
			serviceID:   "svc-123",
			serviceName: "my-service",
			expected:    "docker-service:svc-123",
		},
		{
			name:        "whitespace host ID treated as empty",
			hostID:      "   ",
			serviceID:   "svc-123",
			serviceName: "my-service",
			expected:    "docker-service:svc-123",
		},
		{
			name:        "derives ID from service name when ID empty",
			hostID:      "host-1",
			serviceID:   "",
			serviceName: "My Service",
			expected:    "docker:host-1/service/my-service",
		},
		{
			name:        "special chars in name replaced with dash",
			hostID:      "host-1",
			serviceID:   "",
			serviceName: "my/service:v1.0",
			expected:    "docker:host-1/service/my-service-v1-0",
		},
		{
			name:        "backslash and colon replaced",
			hostID:      "host-1",
			serviceID:   "",
			serviceName: "path\\to:service",
			expected:    "docker:host-1/service/path-to-service",
		},
		{
			name:        "preserves alphanumeric and underscore",
			hostID:      "host-1",
			serviceID:   "",
			serviceName: "my_service_123",
			expected:    "docker:host-1/service/my_service_123",
		},
		{
			name:        "preserves hyphens",
			hostID:      "host-1",
			serviceID:   "",
			serviceName: "my-service-name",
			expected:    "docker:host-1/service/my-service-name",
		},
		{
			name:        "trims leading/trailing dashes and underscores",
			hostID:      "host-1",
			serviceID:   "",
			serviceName: "---my-service___",
			expected:    "docker:host-1/service/my-service",
		},
		{
			name:        "truncates long derived ID to 32 chars",
			hostID:      "host-1",
			serviceID:   "",
			serviceName: "this-is-a-very-long-service-name-that-exceeds-the-limit",
			expected:    "docker:host-1/service/this-is-a-very-long-service-name",
		},
		{
			name:        "uses 'service' when name is all special chars",
			hostID:      "host-1",
			serviceID:   "",
			serviceName: "!!!@@@###",
			expected:    "docker:host-1/service/service",
		},
		{
			name:        "uses 'service' when both ID and name empty",
			hostID:      "host-1",
			serviceID:   "",
			serviceName: "",
			expected:    "docker:host-1/service/service",
		},
		{
			name:        "uses 'service' when both ID and name whitespace",
			hostID:      "host-1",
			serviceID:   "   ",
			serviceName: "   ",
			expected:    "docker:host-1/service/service",
		},
		{
			name:        "no host and derived name",
			hostID:      "",
			serviceID:   "",
			serviceName: "webserver",
			expected:    "docker-service:webserver",
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// t.Parallel()
			result := dockerServiceResourceID(tt.hostID, tt.serviceID, tt.serviceName)
			if result != tt.expected {
				t.Errorf("dockerServiceResourceID(%q, %q, %q) = %q, want %q",
					tt.hostID, tt.serviceID, tt.serviceName, result, tt.expected)
			}
		})
	}
}

func TestClearStorageOfflineAlert(t *testing.T) {
	// t.Parallel()

	t.Run("clears existing offline alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:   "storage-1",
			Name: "local-lvm",
			Node: "pve1",
		}

		alertID := fmt.Sprintf("storage-offline-%s", storage.ID)

		// Create an existing offline alert
		m.mu.Lock()
		m.activeAlerts[alertID] = &Alert{
			ID:        alertID,
			Type:      "storage-offline",
			Level:     "critical",
			StartTime: time.Now().Add(-10 * time.Minute),
		}
		m.offlineConfirmations[storage.ID] = 3
		m.mu.Unlock()

		resolvedCh := make(chan string, 1)
		m.SetResolvedCallback(func(id string) {
			resolvedCh <- id
		})

		m.clearStorageOfflineAlert(storage)

		m.mu.RLock()
		_, alertExists := m.activeAlerts[alertID]
		_, confirmExists := m.offlineConfirmations[storage.ID]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected alert to be cleared")
		}
		if confirmExists {
			t.Error("expected offline confirmation to be cleared")
		}
		select {
		case resolvedID := <-resolvedCh:
			if resolvedID != alertID {
				t.Errorf("expected resolved callback with %q, got %q", alertID, resolvedID)
			}
		case <-time.After(2 * time.Second):
			t.Error("expected resolved callback to be called")
		}
	})

	t.Run("noop when no alert exists", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:   "storage-2",
			Name: "local-zfs",
			Node: "pve1",
		}

		var callbackCalled bool
		m.SetResolvedCallback(func(id string) {
			callbackCalled = true
		})

		m.clearStorageOfflineAlert(storage)

		if callbackCalled {
			t.Error("expected no callback when no alert exists")
		}
	})

	t.Run("clears offline confirmation even when no alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:   "storage-3",
			Name: "ceph-pool",
			Node: "pve2",
		}

		// Set confirmation without alert
		m.mu.Lock()
		m.offlineConfirmations[storage.ID] = 2
		m.mu.Unlock()

		m.clearStorageOfflineAlert(storage)

		m.mu.RLock()
		_, confirmExists := m.offlineConfirmations[storage.ID]
		m.mu.RUnlock()

		if confirmExists {
			t.Error("expected offline confirmation to be cleared")
		}
	})
}

func TestClearHostMetricAlerts(t *testing.T) {
	// t.Parallel()

	t.Run("clears specified metrics", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		hostID := "my-host"
		resourceID := fmt.Sprintf("host:%s", hostID)

		// Create alerts for cpu and memory
		m.mu.Lock()
		m.activeAlerts[fmt.Sprintf("%s-cpu", resourceID)] = &Alert{ID: fmt.Sprintf("%s-cpu", resourceID)}
		m.activeAlerts[fmt.Sprintf("%s-memory", resourceID)] = &Alert{ID: fmt.Sprintf("%s-memory", resourceID)}
		m.activeAlerts[fmt.Sprintf("%s-disk", resourceID)] = &Alert{ID: fmt.Sprintf("%s-disk", resourceID)}
		m.mu.Unlock()

		m.clearHostMetricAlerts(hostID, "cpu", "disk")

		m.mu.RLock()
		_, cpuExists := m.activeAlerts[fmt.Sprintf("%s-cpu", resourceID)]
		_, memExists := m.activeAlerts[fmt.Sprintf("%s-memory", resourceID)]
		_, diskExists := m.activeAlerts[fmt.Sprintf("%s-disk", resourceID)]
		m.mu.RUnlock()

		if cpuExists {
			t.Error("expected cpu alert to be cleared")
		}
		if !memExists {
			t.Error("expected memory alert to remain (not specified)")
		}
		if diskExists {
			t.Error("expected disk alert to be cleared")
		}
	})

	t.Run("defaults to cpu and memory when no metrics specified", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		hostID := "default-host"
		resourceID := fmt.Sprintf("host:%s", hostID)

		// Create alerts
		m.mu.Lock()
		m.activeAlerts[fmt.Sprintf("%s-cpu", resourceID)] = &Alert{ID: fmt.Sprintf("%s-cpu", resourceID)}
		m.activeAlerts[fmt.Sprintf("%s-memory", resourceID)] = &Alert{ID: fmt.Sprintf("%s-memory", resourceID)}
		m.activeAlerts[fmt.Sprintf("%s-disk", resourceID)] = &Alert{ID: fmt.Sprintf("%s-disk", resourceID)}
		m.mu.Unlock()

		m.clearHostMetricAlerts(hostID) // No metrics specified

		m.mu.RLock()
		_, cpuExists := m.activeAlerts[fmt.Sprintf("%s-cpu", resourceID)]
		_, memExists := m.activeAlerts[fmt.Sprintf("%s-memory", resourceID)]
		_, diskExists := m.activeAlerts[fmt.Sprintf("%s-disk", resourceID)]
		m.mu.RUnlock()

		if cpuExists {
			t.Error("expected cpu alert to be cleared (default)")
		}
		if memExists {
			t.Error("expected memory alert to be cleared (default)")
		}
		if !diskExists {
			t.Error("expected disk alert to remain (not in defaults)")
		}
	})

	t.Run("empty hostID is noop", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Create an alert that should not be touched
		m.mu.Lock()
		m.activeAlerts["host:unknown-cpu"] = &Alert{ID: "host:unknown-cpu"}
		m.mu.Unlock()

		m.clearHostMetricAlerts("", "cpu")

		m.mu.RLock()
		_, exists := m.activeAlerts["host:unknown-cpu"]
		m.mu.RUnlock()

		if !exists {
			t.Error("expected alert to remain when hostID is empty")
		}
	})
}

func TestClearHostDiskAlerts(t *testing.T) {
	// t.Parallel()

	t.Run("clears all disk alerts for host", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		hostID := "disk-host"
		resourceID := fmt.Sprintf("host:%s", hostID)

		// Create disk alerts with the expected ResourceID format
		m.mu.Lock()
		m.activeAlerts["disk1-alert"] = &Alert{
			ID:         "disk1-alert",
			ResourceID: fmt.Sprintf("%s/disk:sda", resourceID),
		}
		m.activeAlerts["disk2-alert"] = &Alert{
			ID:         "disk2-alert",
			ResourceID: fmt.Sprintf("%s/disk:sdb", resourceID),
		}
		m.activeAlerts["cpu-alert"] = &Alert{
			ID:         "cpu-alert",
			ResourceID: fmt.Sprintf("%s-cpu", resourceID),
		}
		m.mu.Unlock()

		m.clearHostDiskAlerts(hostID)

		m.mu.RLock()
		_, disk1Exists := m.activeAlerts["disk1-alert"]
		_, disk2Exists := m.activeAlerts["disk2-alert"]
		_, cpuExists := m.activeAlerts["cpu-alert"]
		m.mu.RUnlock()

		if disk1Exists {
			t.Error("expected disk1 alert to be cleared")
		}
		if disk2Exists {
			t.Error("expected disk2 alert to be cleared")
		}
		if !cpuExists {
			t.Error("expected cpu alert to remain (not a disk alert)")
		}
	})

	t.Run("empty hostID is noop", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Create an alert
		m.mu.Lock()
		m.activeAlerts["disk-alert"] = &Alert{
			ID:         "disk-alert",
			ResourceID: "host:unknown/disk:sda",
		}
		m.mu.Unlock()

		m.clearHostDiskAlerts("")

		m.mu.RLock()
		_, exists := m.activeAlerts["disk-alert"]
		m.mu.RUnlock()

		if !exists {
			t.Error("expected alert to remain when hostID is empty")
		}
	})

	t.Run("skips nil alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		hostID := "nil-test"
		resourceID := fmt.Sprintf("host:%s", hostID)

		m.mu.Lock()
		m.activeAlerts["nil-alert"] = nil
		m.activeAlerts["real-alert"] = &Alert{
			ID:         "real-alert",
			ResourceID: fmt.Sprintf("%s/disk:sda", resourceID),
		}
		m.mu.Unlock()

		// Should not panic
		m.clearHostDiskAlerts(hostID)

		m.mu.RLock()
		_, realExists := m.activeAlerts["real-alert"]
		m.mu.RUnlock()

		if realExists {
			t.Error("expected real alert to be cleared")
		}
	})

	t.Run("noop when no matching alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["other-alert"] = &Alert{
			ID:         "other-alert",
			ResourceID: "host:other-host/disk:sda",
		}
		m.mu.Unlock()

		m.clearHostDiskAlerts("my-host")

		m.mu.RLock()
		_, exists := m.activeAlerts["other-alert"]
		m.mu.RUnlock()

		if !exists {
			t.Error("expected other host's alert to remain")
		}
	})
}

func TestCleanupHostDiskAlerts(t *testing.T) {
	// t.Parallel()

	t.Run("clears alerts not in seen set", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.Host{ID: "host-1"}
		resourceID := fmt.Sprintf("host:%s", host.ID)

		// Create disk alerts
		m.mu.Lock()
		m.activeAlerts["disk-sda"] = &Alert{
			ID:         "disk-sda",
			ResourceID: fmt.Sprintf("%s/disk:sda", resourceID),
		}
		m.activeAlerts["disk-sdb"] = &Alert{
			ID:         "disk-sdb",
			ResourceID: fmt.Sprintf("%s/disk:sdb", resourceID),
		}
		m.activeAlerts["disk-sdc"] = &Alert{
			ID:         "disk-sdc",
			ResourceID: fmt.Sprintf("%s/disk:sdc", resourceID),
		}
		m.mu.Unlock()

		// Only sda and sdb are in the seen set
		seen := map[string]struct{}{
			fmt.Sprintf("%s/disk:sda", resourceID): {},
			fmt.Sprintf("%s/disk:sdb", resourceID): {},
		}

		m.cleanupHostDiskAlerts(host, seen)

		m.mu.RLock()
		_, sdaExists := m.activeAlerts["disk-sda"]
		_, sdbExists := m.activeAlerts["disk-sdb"]
		_, sdcExists := m.activeAlerts["disk-sdc"]
		m.mu.RUnlock()

		if !sdaExists {
			t.Error("expected sda alert to remain (in seen set)")
		}
		if !sdbExists {
			t.Error("expected sdb alert to remain (in seen set)")
		}
		if sdcExists {
			t.Error("expected sdc alert to be cleared (not in seen set)")
		}
	})

	t.Run("empty host ID is noop", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["disk-alert"] = &Alert{
			ID:         "disk-alert",
			ResourceID: "host:unknown/disk:sda",
		}
		m.mu.Unlock()

		host := models.Host{ID: ""}
		m.cleanupHostDiskAlerts(host, nil)

		m.mu.RLock()
		_, exists := m.activeAlerts["disk-alert"]
		m.mu.RUnlock()

		if !exists {
			t.Error("expected alert to remain when host ID is empty")
		}
	})

	t.Run("skips nil alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.Host{ID: "host-2"}
		resourceID := fmt.Sprintf("host:%s", host.ID)

		m.mu.Lock()
		m.activeAlerts["nil-alert"] = nil
		m.activeAlerts["real-alert"] = &Alert{
			ID:         "real-alert",
			ResourceID: fmt.Sprintf("%s/disk:sda", resourceID),
		}
		m.mu.Unlock()

		seen := map[string]struct{}{} // Empty seen set

		// Should not panic
		m.cleanupHostDiskAlerts(host, seen)

		m.mu.RLock()
		_, realExists := m.activeAlerts["real-alert"]
		m.mu.RUnlock()

		if realExists {
			t.Error("expected real alert to be cleared (not in seen set)")
		}
	})

	t.Run("skips non-matching prefix", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.Host{ID: "host-3"}
		resourceID := fmt.Sprintf("host:%s", host.ID)

		m.mu.Lock()
		m.activeAlerts["cpu-alert"] = &Alert{
			ID:         "cpu-alert",
			ResourceID: fmt.Sprintf("%s-cpu", resourceID), // Not a disk alert
		}
		m.activeAlerts["disk-alert"] = &Alert{
			ID:         "disk-alert",
			ResourceID: fmt.Sprintf("%s/disk:sda", resourceID),
		}
		m.mu.Unlock()

		seen := map[string]struct{}{} // Empty seen set

		m.cleanupHostDiskAlerts(host, seen)

		m.mu.RLock()
		_, cpuExists := m.activeAlerts["cpu-alert"]
		_, diskExists := m.activeAlerts["disk-alert"]
		m.mu.RUnlock()

		if !cpuExists {
			t.Error("expected cpu alert to remain (not a disk alert)")
		}
		if diskExists {
			t.Error("expected disk alert to be cleared")
		}
	})
}

func TestHandleDockerHostRemovedEmptyID(t *testing.T) {
	// t.Parallel()
	m := newTestManager(t)

	// Create some alerts that should not be touched
	m.mu.Lock()
	m.activeAlerts["docker-host-offline-host1"] = &Alert{ID: "docker-host-offline-host1"}
	m.dockerOfflineCount["host1"] = 3
	m.mu.Unlock()

	// Call with empty ID - should be noop
	host := models.DockerHost{ID: ""}
	m.HandleDockerHostRemoved(host)

	m.mu.RLock()
	_, alertExists := m.activeAlerts["docker-host-offline-host1"]
	_, countExists := m.dockerOfflineCount["host1"]
	m.mu.RUnlock()

	if !alertExists {
		t.Error("expected alert to remain when host ID is empty")
	}
	if !countExists {
		t.Error("expected offline count to remain when host ID is empty")
	}
}

func TestHandleDockerHostOnline(t *testing.T) {
	// t.Parallel()

	t.Run("clears offline alert and tracking", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.DockerHost{ID: "docker-host-1", DisplayName: "My Host"}
		alertID := fmt.Sprintf("docker-host-offline-%s", host.ID)

		// Set up offline alert and tracking
		m.mu.Lock()
		m.activeAlerts[alertID] = &Alert{ID: alertID, ResourceID: fmt.Sprintf("docker:%s", host.ID)}
		m.dockerOfflineCount[host.ID] = 5
		m.mu.Unlock()

		m.HandleDockerHostOnline(host)

		m.mu.RLock()
		_, alertExists := m.activeAlerts[alertID]
		_, countExists := m.dockerOfflineCount[host.ID]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected offline alert to be cleared")
		}
		if countExists {
			t.Error("expected offline count to be cleared")
		}
	})

	t.Run("noop when no offline alert exists", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.DockerHost{ID: "docker-host-2"}

		// Set up only tracking, no alert
		m.mu.Lock()
		m.dockerOfflineCount[host.ID] = 2
		m.mu.Unlock()

		m.HandleDockerHostOnline(host)

		m.mu.RLock()
		_, countExists := m.dockerOfflineCount[host.ID]
		m.mu.RUnlock()

		if countExists {
			t.Error("expected offline count to be cleared even without alert")
		}
	})

	t.Run("empty host ID is noop", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Create some data that should not be touched
		m.mu.Lock()
		m.activeAlerts["docker-host-offline-other"] = &Alert{ID: "docker-host-offline-other"}
		m.dockerOfflineCount["other"] = 3
		m.mu.Unlock()

		host := models.DockerHost{ID: ""}
		m.HandleDockerHostOnline(host)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["docker-host-offline-other"]
		_, countExists := m.dockerOfflineCount["other"]
		m.mu.RUnlock()

		if !alertExists {
			t.Error("expected other alert to remain when host ID is empty")
		}
		if !countExists {
			t.Error("expected other count to remain when host ID is empty")
		}
	})
}

func TestCleanupDockerContainerAlerts(t *testing.T) {
	// t.Parallel()

	t.Run("clears alerts not in seen set", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.DockerHost{ID: "docker-host-1"}
		prefix := fmt.Sprintf("docker:%s/", host.ID)

		// Create container alerts
		m.mu.Lock()
		m.activeAlerts["container1-alert"] = &Alert{
			ID:         "container1-alert",
			ResourceID: prefix + "container1",
		}
		m.activeAlerts["container2-alert"] = &Alert{
			ID:         "container2-alert",
			ResourceID: prefix + "container2",
		}
		m.activeAlerts["container3-alert"] = &Alert{
			ID:         "container3-alert",
			ResourceID: prefix + "container3",
		}
		m.dockerStateConfirm[prefix+"container1"] = 2
		m.dockerStateConfirm[prefix+"container2"] = 1
		m.dockerStateConfirm[prefix+"container3"] = 3
		m.mu.Unlock()

		// Only container1 and container2 are in seen set
		seen := map[string]struct{}{
			prefix + "container1": {},
			prefix + "container2": {},
		}

		m.cleanupDockerContainerAlerts(host, seen)

		m.mu.RLock()
		_, c1Exists := m.activeAlerts["container1-alert"]
		_, c2Exists := m.activeAlerts["container2-alert"]
		_, c3Exists := m.activeAlerts["container3-alert"]
		_, s1Exists := m.dockerStateConfirm[prefix+"container1"]
		_, s2Exists := m.dockerStateConfirm[prefix+"container2"]
		_, s3Exists := m.dockerStateConfirm[prefix+"container3"]
		m.mu.RUnlock()

		if !c1Exists {
			t.Error("expected container1 alert to remain (in seen set)")
		}
		if !c2Exists {
			t.Error("expected container2 alert to remain (in seen set)")
		}
		if c3Exists {
			t.Error("expected container3 alert to be cleared (not in seen set)")
		}
		if !s1Exists {
			t.Error("expected container1 state confirm to remain (in seen set)")
		}
		if !s2Exists {
			t.Error("expected container2 state confirm to remain (in seen set)")
		}
		if s3Exists {
			t.Error("expected container3 state confirm to be cleared (not in seen set)")
		}
	})

	t.Run("skips alerts from other hosts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.DockerHost{ID: "host-a"}

		// Create alert for a different host
		m.mu.Lock()
		m.activeAlerts["other-host-alert"] = &Alert{
			ID:         "other-host-alert",
			ResourceID: "docker:host-b/container1",
		}
		m.mu.Unlock()

		seen := map[string]struct{}{} // Empty seen set

		m.cleanupDockerContainerAlerts(host, seen)

		m.mu.RLock()
		_, exists := m.activeAlerts["other-host-alert"]
		m.mu.RUnlock()

		if !exists {
			t.Error("expected other host's alert to remain")
		}
	})

	t.Run("handles empty seen set", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.DockerHost{ID: "host-c"}
		prefix := fmt.Sprintf("docker:%s/", host.ID)

		m.mu.Lock()
		m.activeAlerts["to-clear"] = &Alert{
			ID:         "to-clear",
			ResourceID: prefix + "container1",
		}
		m.dockerStateConfirm[prefix+"container1"] = 1
		m.mu.Unlock()

		m.cleanupDockerContainerAlerts(host, map[string]struct{}{})

		m.mu.RLock()
		_, alertExists := m.activeAlerts["to-clear"]
		_, stateExists := m.dockerStateConfirm[prefix+"container1"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected alert to be cleared with empty seen set")
		}
		if stateExists {
			t.Error("expected state confirm to be cleared with empty seen set")
		}
	})
}

func TestSafeCallEscalateCallback(t *testing.T) {
	// t.Parallel()

	t.Run("calls callback with alert and level", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		var receivedAlert *Alert
		var receivedLevel int
		done := make(chan struct{})

		m.SetEscalateCallback(func(alert *Alert, level int) {
			receivedAlert = alert
			receivedLevel = level
			close(done)
		})

		alert := &Alert{
			ID:           "test-alert",
			Type:         "test",
			ResourceName: "resource-1",
		}

		m.safeCallEscalateCallback(alert, 2)

		select {
		case <-done:
			if receivedAlert == nil {
				t.Fatal("expected alert to be received")
			}
			if receivedAlert.ID != "test-alert" {
				t.Errorf("expected alert ID 'test-alert', got %q", receivedAlert.ID)
			}
			if receivedLevel != 2 {
				t.Errorf("expected level 2, got %d", receivedLevel)
			}
		case <-time.After(1 * time.Second):
			t.Fatal("callback not called within timeout")
		}
	})

	t.Run("noop when callback is nil", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		// No callback set

		alert := &Alert{ID: "test-alert"}

		// Should not panic
		m.safeCallEscalateCallback(alert, 1)
	})

	t.Run("recovers from panic in callback", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		done := make(chan struct{})
		m.SetEscalateCallback(func(alert *Alert, level int) {
			defer close(done)
			panic("test panic")
		})

		alert := &Alert{ID: "panic-test"}

		// Should not panic the caller
		m.safeCallEscalateCallback(alert, 1)

		select {
		case <-done:
			// Callback ran (and panicked, but recovered)
		case <-time.After(1 * time.Second):
			t.Fatal("callback not called within timeout")
		}
	})

	t.Run("clones alert to prevent modification", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		var receivedAlert *Alert
		done := make(chan struct{})

		m.SetEscalateCallback(func(alert *Alert, level int) {
			receivedAlert = alert
			close(done)
		})

		original := &Alert{
			ID:           "original-alert",
			ResourceName: "original-resource",
		}

		m.safeCallEscalateCallback(original, 1)

		select {
		case <-done:
			// Modify original after callback started
			original.ResourceName = "modified"

			// Received alert should be a clone, not affected by modification
			if receivedAlert.ID != "original-alert" {
				t.Errorf("expected cloned alert ID")
			}
		case <-time.After(1 * time.Second):
			t.Fatal("callback not called within timeout")
		}
	})
}

func TestSafeCallResolvedCallback(t *testing.T) {
	// t.Parallel()

	t.Run("calls callback with alert ID synchronously", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		var receivedID string
		m.SetResolvedCallback(func(alertID string) {
			receivedID = alertID
		})

		m.safeCallResolvedCallback("test-alert-123", false)

		if receivedID != "test-alert-123" {
			t.Errorf("expected alert ID 'test-alert-123', got %q", receivedID)
		}
	})

	t.Run("calls callback asynchronously", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		var receivedID string
		done := make(chan struct{})

		m.SetResolvedCallback(func(alertID string) {
			receivedID = alertID
			close(done)
		})

		m.safeCallResolvedCallback("async-alert", true)

		select {
		case <-done:
			if receivedID != "async-alert" {
				t.Errorf("expected alert ID 'async-alert', got %q", receivedID)
			}
		case <-time.After(1 * time.Second):
			t.Fatal("async callback not called within timeout")
		}
	})

	t.Run("noop when callback is nil", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		// No callback set

		// Should not panic
		m.safeCallResolvedCallback("test-alert", false)
		m.safeCallResolvedCallback("test-alert", true)
	})

	t.Run("recovers from panic in sync callback", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.SetResolvedCallback(func(alertID string) {
			panic("test panic")
		})

		// Should not panic the caller
		m.safeCallResolvedCallback("panic-test", false)
	})

	t.Run("recovers from panic in async callback", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		done := make(chan struct{})
		m.SetResolvedCallback(func(alertID string) {
			defer close(done)
			panic("async panic")
		})

		m.safeCallResolvedCallback("async-panic", true)

		select {
		case <-done:
			// Callback ran (and panicked, but recovered)
		case <-time.After(1 * time.Second):
			t.Fatal("async callback not called within timeout")
		}
	})
}

func TestHandleHostOnline(t *testing.T) {
	// t.Parallel()

	t.Run("clears offline alert and confirmation tracking", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.Host{ID: "host-1", Hostname: "my-host"}
		alertID := fmt.Sprintf("host-offline-%s", host.ID)
		resourceKey := fmt.Sprintf("host:%s", host.ID)

		// Set up offline alert and tracking
		m.mu.Lock()
		m.activeAlerts[alertID] = &Alert{ID: alertID, ResourceID: resourceKey}
		m.offlineConfirmations[resourceKey] = 5
		m.mu.Unlock()

		m.HandleHostOnline(host)

		m.mu.RLock()
		_, alertExists := m.activeAlerts[alertID]
		_, confirmExists := m.offlineConfirmations[resourceKey]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected offline alert to be cleared")
		}
		if confirmExists {
			t.Error("expected offline confirmation to be cleared")
		}
	})

	t.Run("clears confirmation even without alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.Host{ID: "host-2"}
		resourceKey := fmt.Sprintf("host:%s", host.ID)

		// Only tracking, no alert
		m.mu.Lock()
		m.offlineConfirmations[resourceKey] = 2
		m.mu.Unlock()

		m.HandleHostOnline(host)

		m.mu.RLock()
		_, exists := m.offlineConfirmations[resourceKey]
		m.mu.RUnlock()

		if exists {
			t.Error("expected offline confirmation to be cleared")
		}
	})

	t.Run("empty host ID is noop", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Create data that should not be touched
		m.mu.Lock()
		m.activeAlerts["host-offline-other"] = &Alert{ID: "host-offline-other"}
		m.offlineConfirmations["host:other"] = 3
		m.mu.Unlock()

		host := models.Host{ID: ""}
		m.HandleHostOnline(host)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["host-offline-other"]
		_, confirmExists := m.offlineConfirmations["host:other"]
		m.mu.RUnlock()

		if !alertExists {
			t.Error("expected other alert to remain when host ID is empty")
		}
		if !confirmExists {
			t.Error("expected other confirmation to remain when host ID is empty")
		}
	})
}

func TestAcknowledgeAlertNotFound(t *testing.T) {
	// t.Parallel()
	m := newTestManager(t)

	err := m.AcknowledgeAlert("nonexistent-alert", "user1")

	if err == nil {
		t.Fatal("expected error when acknowledging nonexistent alert")
	}
	if !strings.Contains(err.Error(), "alert not found") {
		t.Errorf("expected 'alert not found' error, got: %v", err)
	}
}

func TestUnacknowledgeAlertNotFound(t *testing.T) {
	// t.Parallel()
	m := newTestManager(t)

	err := m.UnacknowledgeAlert("nonexistent-alert")

	if err == nil {
		t.Fatal("expected error when unacknowledging nonexistent alert")
	}
	if !strings.Contains(err.Error(), "alert not found") {
		t.Errorf("expected 'alert not found' error, got: %v", err)
	}
}

func TestUnacknowledgeAlertSuccess(t *testing.T) {
	// t.Parallel()
	m := newTestManager(t)

	// Create and acknowledge an alert first
	alertID := "test-alert-123"
	now := time.Now()
	m.activeAlerts[alertID] = &Alert{
		ID:           alertID,
		Acknowledged: true,
		AckTime:      &now,
		AckUser:      "user1",
	}
	m.ackState[alertID] = ackRecord{acknowledged: true, user: "user1", time: now}

	// Unacknowledge the alert
	err := m.UnacknowledgeAlert(alertID)

	if err != nil {
		t.Fatalf("unexpected error unacknowledging alert: %v", err)
	}

	// Verify alert state was updated
	alert := m.activeAlerts[alertID]
	if alert.Acknowledged {
		t.Error("expected Acknowledged to be false")
	}
	if alert.AckTime != nil {
		t.Error("expected AckTime to be nil")
	}
	if alert.AckUser != "" {
		t.Errorf("expected AckUser to be empty, got: %s", alert.AckUser)
	}

	// Verify ackState was removed
	if _, exists := m.ackState[alertID]; exists {
		t.Error("expected ackState entry to be deleted")
	}
}

func TestClearActiveAlertsEmptyMaps(t *testing.T) {
	// t.Parallel()
	m := newTestManager(t)

	// Ensure maps are empty initially
	if len(m.activeAlerts) != 0 {
		t.Fatalf("expected activeAlerts to be empty, got %d", len(m.activeAlerts))
	}
	if len(m.pendingAlerts) != 0 {
		t.Fatalf("expected pendingAlerts to be empty, got %d", len(m.pendingAlerts))
	}

	// Call ClearActiveAlerts on empty manager - should return early without panic
	m.ClearActiveAlerts()

	// Verify maps are still empty (function returned early)
	if len(m.activeAlerts) != 0 {
		t.Errorf("expected activeAlerts to remain empty, got %d", len(m.activeAlerts))
	}
}

func TestClearActiveAlertsWithExistingAlerts(t *testing.T) {
	// t.Parallel()
	m := newTestManager(t)

	// Populate various maps with test data
	m.mu.Lock()
	m.activeAlerts["test-alert-1"] = &Alert{ID: "test-alert-1", Type: "cpu-usage"}
	m.activeAlerts["test-alert-2"] = &Alert{ID: "test-alert-2", Type: "memory-usage"}
	m.pendingAlerts["pending-1"] = time.Now()
	m.recentAlerts["recent-1"] = &Alert{ID: "recent-1", Type: "disk-usage"}
	m.suppressedUntil["suppressed-1"] = time.Now().Add(time.Hour)
	m.alertRateLimit["rate-1"] = []time.Time{time.Now()}
	m.nodeOfflineCount["node-1"] = 3
	m.offlineConfirmations["node-1"] = 2
	m.dockerOfflineCount["docker-1"] = 1
	m.dockerStateConfirm["docker-1"] = 1
	m.ackState["test-alert-1"] = ackRecord{acknowledged: true, user: "testuser", time: time.Now()}
	m.mu.Unlock()

	m.resolvedMutex.Lock()
	m.recentlyResolved["resolved-1"] = &ResolvedAlert{Alert: &Alert{ID: "resolved-1"}, ResolvedTime: time.Now()}
	m.resolvedMutex.Unlock()

	// Call ClearActiveAlerts
	m.ClearActiveAlerts()

	// Give goroutine time to run SaveActiveAlerts
	time.Sleep(50 * time.Millisecond)

	// Verify all maps are cleared
	m.mu.RLock()
	if len(m.activeAlerts) != 0 {
		t.Errorf("expected activeAlerts to be empty, got %d", len(m.activeAlerts))
	}
	if len(m.pendingAlerts) != 0 {
		t.Errorf("expected pendingAlerts to be empty, got %d", len(m.pendingAlerts))
	}
	if len(m.recentAlerts) != 0 {
		t.Errorf("expected recentAlerts to be empty, got %d", len(m.recentAlerts))
	}
	if len(m.suppressedUntil) != 0 {
		t.Errorf("expected suppressedUntil to be empty, got %d", len(m.suppressedUntil))
	}
	if len(m.alertRateLimit) != 0 {
		t.Errorf("expected alertRateLimit to be empty, got %d", len(m.alertRateLimit))
	}
	if len(m.nodeOfflineCount) != 0 {
		t.Errorf("expected nodeOfflineCount to be empty, got %d", len(m.nodeOfflineCount))
	}
	if len(m.offlineConfirmations) != 0 {
		t.Errorf("expected offlineConfirmations to be empty, got %d", len(m.offlineConfirmations))
	}
	if len(m.dockerOfflineCount) != 0 {
		t.Errorf("expected dockerOfflineCount to be empty, got %d", len(m.dockerOfflineCount))
	}
	if len(m.dockerStateConfirm) != 0 {
		t.Errorf("expected dockerStateConfirm to be empty, got %d", len(m.dockerStateConfirm))
	}
	if len(m.ackState) != 0 {
		t.Errorf("expected ackState to be empty, got %d", len(m.ackState))
	}
	m.mu.RUnlock()

	m.resolvedMutex.RLock()
	if len(m.recentlyResolved) != 0 {
		t.Errorf("expected recentlyResolved to be empty, got %d", len(m.recentlyResolved))
	}
	m.resolvedMutex.RUnlock()
}

func TestClearBackupAlertsLocked(t *testing.T) {
	// t.Parallel()

	t.Run("clears backup-age and backup-orphaned alerts only", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add a backup-age alert
		m.activeAlerts["backup-alert-1"] = &Alert{
			ID:   "backup-alert-1",
			Type: "backup-age",
		}
		// Add a non-backup alert
		m.activeAlerts["cpu-alert-1"] = &Alert{
			ID:   "cpu-alert-1",
			Type: "cpu",
		}
		// Add another backup-age alert
		m.activeAlerts["backup-alert-2"] = &Alert{
			ID:   "backup-alert-2",
			Type: "backup-age",
		}
		// Add a backup-orphaned alert
		m.activeAlerts["backup-orphaned-1"] = &Alert{
			ID:   "backup-orphaned-1",
			Type: "backup-orphaned",
		}

		if len(m.activeAlerts) != 4 {
			t.Fatalf("expected 4 alerts, got %d", len(m.activeAlerts))
		}

		m.mu.Lock()
		m.clearBackupAlertsLocked()
		m.mu.Unlock()

		// Should have removed backup-age and backup-orphaned alerts, keeping cpu alert
		if len(m.activeAlerts) != 1 {
			t.Errorf("expected 1 alert remaining, got %d", len(m.activeAlerts))
		}
		if _, exists := m.activeAlerts["cpu-alert-1"]; !exists {
			t.Error("expected cpu-alert-1 to remain")
		}
		if _, exists := m.activeAlerts["backup-alert-1"]; exists {
			t.Error("expected backup-alert-1 to be cleared")
		}
		if _, exists := m.activeAlerts["backup-alert-2"]; exists {
			t.Error("expected backup-alert-2 to be cleared")
		}
		if _, exists := m.activeAlerts["backup-orphaned-1"]; exists {
			t.Error("expected backup-orphaned-1 to be cleared")
		}
	})

	t.Run("handles nil alert in map", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add a nil alert entry
		m.activeAlerts["nil-alert"] = nil
		// Add a valid backup-age alert
		m.activeAlerts["backup-alert"] = &Alert{
			ID:   "backup-alert",
			Type: "backup-age",
		}

		m.mu.Lock()
		m.clearBackupAlertsLocked()
		m.mu.Unlock()

		// Should have skipped nil and removed backup-age
		if len(m.activeAlerts) != 1 {
			t.Errorf("expected 1 alert remaining, got %d", len(m.activeAlerts))
		}
		// Nil entry should remain
		if _, exists := m.activeAlerts["nil-alert"]; !exists {
			t.Error("expected nil-alert entry to remain (nil check should skip it)")
		}
	})

	t.Run("empty alerts map is no-op", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.clearBackupAlertsLocked()
		m.mu.Unlock()

		if len(m.activeAlerts) != 0 {
			t.Errorf("expected 0 alerts, got %d", len(m.activeAlerts))
		}
	})
}

func TestClearBackupAlerts(t *testing.T) {
	// t.Parallel()
	m := newTestManager(t)

	// Add a backup-age alert
	m.activeAlerts["backup-alert"] = &Alert{
		ID:   "backup-alert",
		Type: "backup-age",
	}
	// Add a non-backup alert
	m.activeAlerts["cpu-alert"] = &Alert{
		ID:   "cpu-alert",
		Type: "cpu",
	}

	// Call the public method (handles locking internally)
	m.clearBackupAlerts()

	// Only cpu alert should remain
	if len(m.activeAlerts) != 1 {
		t.Errorf("expected 1 alert remaining, got %d", len(m.activeAlerts))
	}
	if _, exists := m.activeAlerts["cpu-alert"]; !exists {
		t.Error("expected cpu-alert to remain")
	}
}

func TestClearSnapshotAlertsForInstanceLocked(t *testing.T) {
	// t.Parallel()

	t.Run("clears snapshot alerts for specific instance", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add snapshot alerts for different instances
		m.activeAlerts["snap-inst1"] = &Alert{
			ID:       "snap-inst1",
			Type:     "snapshot-age",
			Instance: "instance1",
		}
		m.activeAlerts["snap-inst2"] = &Alert{
			ID:       "snap-inst2",
			Type:     "snapshot-age",
			Instance: "instance2",
		}
		// Add a non-snapshot alert
		m.activeAlerts["cpu-alert"] = &Alert{
			ID:   "cpu-alert",
			Type: "cpu",
		}

		m.mu.Lock()
		m.clearSnapshotAlertsForInstanceLocked("instance1")
		m.mu.Unlock()

		// Should keep instance2 snapshot and cpu alert
		if len(m.activeAlerts) != 2 {
			t.Errorf("expected 2 alerts remaining, got %d", len(m.activeAlerts))
		}
		if _, exists := m.activeAlerts["snap-inst1"]; exists {
			t.Error("expected snap-inst1 to be cleared")
		}
		if _, exists := m.activeAlerts["snap-inst2"]; !exists {
			t.Error("expected snap-inst2 to remain")
		}
	})

	t.Run("clears all snapshot alerts when instance is empty", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add snapshot alerts for different instances
		m.activeAlerts["snap-inst1"] = &Alert{
			ID:       "snap-inst1",
			Type:     "snapshot-age",
			Instance: "instance1",
		}
		m.activeAlerts["snap-inst2"] = &Alert{
			ID:       "snap-inst2",
			Type:     "snapshot-age",
			Instance: "instance2",
		}
		// Add a non-snapshot alert
		m.activeAlerts["cpu-alert"] = &Alert{
			ID:   "cpu-alert",
			Type: "cpu",
		}

		m.mu.Lock()
		m.clearSnapshotAlertsForInstanceLocked("")
		m.mu.Unlock()

		// Should keep only cpu alert
		if len(m.activeAlerts) != 1 {
			t.Errorf("expected 1 alert remaining, got %d", len(m.activeAlerts))
		}
		if _, exists := m.activeAlerts["cpu-alert"]; !exists {
			t.Error("expected cpu-alert to remain")
		}
	})

	t.Run("handles nil alert in map", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add nil entry and valid snapshot alert
		m.activeAlerts["nil-alert"] = nil
		m.activeAlerts["snap-alert"] = &Alert{
			ID:       "snap-alert",
			Type:     "snapshot-age",
			Instance: "inst1",
		}

		m.mu.Lock()
		m.clearSnapshotAlertsForInstanceLocked("inst1")
		m.mu.Unlock()

		// Nil entry should remain, snapshot should be cleared
		if len(m.activeAlerts) != 1 {
			t.Errorf("expected 1 alert remaining, got %d", len(m.activeAlerts))
		}
		if _, exists := m.activeAlerts["nil-alert"]; !exists {
			t.Error("expected nil-alert entry to remain")
		}
	})
}

func TestClearSnapshotAlertsForInstance(t *testing.T) {
	// t.Parallel()
	m := newTestManager(t)

	// Add a snapshot alert
	m.activeAlerts["snap-alert"] = &Alert{
		ID:       "snap-alert",
		Type:     "snapshot-age",
		Instance: "instance1",
	}

	// Call the public method (handles locking internally)
	m.clearSnapshotAlertsForInstance("instance1")

	if len(m.activeAlerts) != 0 {
		t.Errorf("expected 0 alerts remaining, got %d", len(m.activeAlerts))
	}
}

func TestApplyGlobalOfflineSettingsLocked(t *testing.T) {
	// t.Parallel()

	t.Run("DisableAllNodesOffline clears node offline alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add node offline alerts
		m.activeAlerts["node-offline-node1"] = &Alert{ID: "node-offline-node1", Type: "offline"}
		m.activeAlerts["node-offline-node2"] = &Alert{ID: "node-offline-node2", Type: "offline"}
		// Add non-node alert
		m.activeAlerts["cpu-alert"] = &Alert{ID: "cpu-alert", Type: "cpu"}
		// Add to nodeOfflineCount
		m.nodeOfflineCount["node1"] = 3
		m.nodeOfflineCount["node2"] = 2

		m.config.DisableAllNodesOffline = true

		m.mu.Lock()
		m.applyGlobalOfflineSettingsLocked()
		m.mu.Unlock()

		// Node alerts should be cleared
		if _, exists := m.activeAlerts["node-offline-node1"]; exists {
			t.Error("expected node-offline-node1 to be cleared")
		}
		if _, exists := m.activeAlerts["node-offline-node2"]; exists {
			t.Error("expected node-offline-node2 to be cleared")
		}
		// Non-node alert should remain
		if _, exists := m.activeAlerts["cpu-alert"]; !exists {
			t.Error("expected cpu-alert to remain")
		}
		// nodeOfflineCount should be reset
		if len(m.nodeOfflineCount) != 0 {
			t.Errorf("expected nodeOfflineCount to be empty, got %d entries", len(m.nodeOfflineCount))
		}
	})

	t.Run("DisableAllPBSOffline clears PBS offline alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add PBS offline alerts
		m.activeAlerts["pbs-offline-pbs1"] = &Alert{ID: "pbs-offline-pbs1", ResourceID: "pbs1", Type: "offline"}
		// Add non-PBS alert
		m.activeAlerts["cpu-alert"] = &Alert{ID: "cpu-alert", Type: "cpu"}
		// Add to offlineConfirmations
		m.offlineConfirmations["pbs1"] = 3

		m.config.DisableAllPBSOffline = true

		m.mu.Lock()
		m.applyGlobalOfflineSettingsLocked()
		m.mu.Unlock()

		// PBS alert should be cleared
		if _, exists := m.activeAlerts["pbs-offline-pbs1"]; exists {
			t.Error("expected pbs-offline-pbs1 to be cleared")
		}
		// Non-PBS alert should remain
		if _, exists := m.activeAlerts["cpu-alert"]; !exists {
			t.Error("expected cpu-alert to remain")
		}
		// offlineConfirmations for PBS should be removed
		if _, exists := m.offlineConfirmations["pbs1"]; exists {
			t.Error("expected offlineConfirmations for pbs1 to be removed")
		}
	})

	t.Run("DisableAllGuestsOffline clears guest powered off alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add guest powered off alerts
		m.activeAlerts["guest-powered-off-vm1"] = &Alert{ID: "guest-powered-off-vm1", ResourceID: "vm1", Type: "powered-off"}
		// Add non-guest alert
		m.activeAlerts["cpu-alert"] = &Alert{ID: "cpu-alert", Type: "cpu"}
		// Add to offlineConfirmations
		m.offlineConfirmations["vm1"] = 2

		m.config.DisableAllGuestsOffline = true

		m.mu.Lock()
		m.applyGlobalOfflineSettingsLocked()
		m.mu.Unlock()

		// Guest alert should be cleared
		if _, exists := m.activeAlerts["guest-powered-off-vm1"]; exists {
			t.Error("expected guest-powered-off-vm1 to be cleared")
		}
		// Non-guest alert should remain
		if _, exists := m.activeAlerts["cpu-alert"]; !exists {
			t.Error("expected cpu-alert to remain")
		}
		// offlineConfirmations for guest should be removed
		if _, exists := m.offlineConfirmations["vm1"]; exists {
			t.Error("expected offlineConfirmations for vm1 to be removed")
		}
	})

	t.Run("DisableAllDockerHostsOffline clears docker host alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add docker host offline alerts
		m.activeAlerts["docker-host-offline-host1"] = &Alert{ID: "docker-host-offline-host1", Type: "offline"}
		// Add non-docker host alert
		m.activeAlerts["cpu-alert"] = &Alert{ID: "cpu-alert", Type: "cpu"}
		// Add to dockerOfflineCount
		m.dockerOfflineCount["host1"] = 3

		m.config.DisableAllDockerHostsOffline = true

		m.mu.Lock()
		m.applyGlobalOfflineSettingsLocked()
		m.mu.Unlock()

		// Docker host alert should be cleared
		if _, exists := m.activeAlerts["docker-host-offline-host1"]; exists {
			t.Error("expected docker-host-offline-host1 to be cleared")
		}
		// Non-docker host alert should remain
		if _, exists := m.activeAlerts["cpu-alert"]; !exists {
			t.Error("expected cpu-alert to remain")
		}
		// dockerOfflineCount should be reset
		if len(m.dockerOfflineCount) != 0 {
			t.Errorf("expected dockerOfflineCount to be empty, got %d entries", len(m.dockerOfflineCount))
		}
	})

	t.Run("DisableAllDockerContainers clears docker container alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add docker container alerts
		m.activeAlerts["docker-container-unhealthy-c1"] = &Alert{ID: "docker-container-unhealthy-c1", Type: "unhealthy"}
		m.activeAlerts["docker-container-exited-c2"] = &Alert{ID: "docker-container-exited-c2", Type: "exited"}
		// Add non-container alert
		m.activeAlerts["cpu-alert"] = &Alert{ID: "cpu-alert", Type: "cpu"}
		// Add tracking state
		m.dockerStateConfirm["c1"] = 2
		m.dockerRestartTracking["c1"] = &dockerRestartRecord{count: 5}
		m.dockerLastExitCode["c1"] = 137

		m.config.DisableAllDockerContainers = true

		m.mu.Lock()
		m.applyGlobalOfflineSettingsLocked()
		m.mu.Unlock()

		// Docker container alerts should be cleared
		if _, exists := m.activeAlerts["docker-container-unhealthy-c1"]; exists {
			t.Error("expected docker-container-unhealthy-c1 to be cleared")
		}
		if _, exists := m.activeAlerts["docker-container-exited-c2"]; exists {
			t.Error("expected docker-container-exited-c2 to be cleared")
		}
		// Non-container alert should remain
		if _, exists := m.activeAlerts["cpu-alert"]; !exists {
			t.Error("expected cpu-alert to remain")
		}
		// Tracking state should be reset
		if len(m.dockerStateConfirm) != 0 {
			t.Errorf("expected dockerStateConfirm to be empty, got %d entries", len(m.dockerStateConfirm))
		}
		if len(m.dockerRestartTracking) != 0 {
			t.Errorf("expected dockerRestartTracking to be empty, got %d entries", len(m.dockerRestartTracking))
		}
		if len(m.dockerLastExitCode) != 0 {
			t.Errorf("expected dockerLastExitCode to be empty, got %d entries", len(m.dockerLastExitCode))
		}
	})

	t.Run("DisableAllDockerServices clears docker service alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add docker service alerts
		m.activeAlerts["docker-service-unhealthy-svc1"] = &Alert{ID: "docker-service-unhealthy-svc1", Type: "unhealthy"}
		// Add non-service alert
		m.activeAlerts["cpu-alert"] = &Alert{ID: "cpu-alert", Type: "cpu"}

		m.config.DisableAllDockerServices = true

		m.mu.Lock()
		m.applyGlobalOfflineSettingsLocked()
		m.mu.Unlock()

		// Docker service alert should be cleared
		if _, exists := m.activeAlerts["docker-service-unhealthy-svc1"]; exists {
			t.Error("expected docker-service-unhealthy-svc1 to be cleared")
		}
		// Non-service alert should remain
		if _, exists := m.activeAlerts["cpu-alert"]; !exists {
			t.Error("expected cpu-alert to remain")
		}
	})

	t.Run("no settings enabled does nothing", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add various alerts
		m.activeAlerts["node-offline-node1"] = &Alert{ID: "node-offline-node1", Type: "offline"}
		m.activeAlerts["pbs-offline-pbs1"] = &Alert{ID: "pbs-offline-pbs1", Type: "offline"}
		m.activeAlerts["docker-container-unhealthy-c1"] = &Alert{ID: "docker-container-unhealthy-c1", Type: "unhealthy"}

		// All disable settings are false by default

		m.mu.Lock()
		m.applyGlobalOfflineSettingsLocked()
		m.mu.Unlock()

		// All alerts should remain
		if len(m.activeAlerts) != 3 {
			t.Errorf("expected 3 alerts to remain, got %d", len(m.activeAlerts))
		}
	})
}

func TestHandleHostOffline(t *testing.T) {
	// t.Parallel()

	t.Run("empty host ID returns early", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.config.Enabled = true

		host := models.Host{ID: "", Hostname: "test-host"}
		m.HandleHostOffline(host)

		// No alert should be created
		if len(m.activeAlerts) != 0 {
			t.Errorf("expected 0 alerts, got %d", len(m.activeAlerts))
		}
	})

	t.Run("alerts disabled returns early", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.config.Enabled = false

		host := models.Host{ID: "host1", Hostname: "test-host"}
		m.HandleHostOffline(host)

		// No alert should be created
		if len(m.activeAlerts) != 0 {
			t.Errorf("expected 0 alerts, got %d", len(m.activeAlerts))
		}
	})

	t.Run("DisableAllHostsOffline clears alert and returns", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.config.Enabled = true
		m.config.DisableAllHostsOffline = true

		// Pre-create an alert and confirmation
		alertID := "host-offline-host1"
		m.activeAlerts[alertID] = &Alert{ID: alertID, Type: "host-offline"}
		m.offlineConfirmations["host:host1"] = 5

		host := models.Host{ID: "host1", Hostname: "test-host"}
		m.HandleHostOffline(host)

		// Alert should be cleared and confirmations removed
		if _, exists := m.activeAlerts[alertID]; exists {
			t.Error("expected alert to be cleared")
		}
		if _, exists := m.offlineConfirmations["host:host1"]; exists {
			t.Error("expected offlineConfirmations to be cleared")
		}
	})

	t.Run("override DisableConnectivity clears alert and returns", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.config.Enabled = true
		m.config.Overrides = map[string]ThresholdConfig{
			"host1": {DisableConnectivity: true},
		}

		// Pre-create an alert and confirmation
		alertID := "host-offline-host1"
		m.activeAlerts[alertID] = &Alert{ID: alertID, Type: "host-offline"}
		m.offlineConfirmations["host:host1"] = 5

		host := models.Host{ID: "host1", Hostname: "test-host"}
		m.HandleHostOffline(host)

		// Alert should be cleared and confirmations removed
		if _, exists := m.activeAlerts[alertID]; exists {
			t.Error("expected alert to be cleared")
		}
		if _, exists := m.offlineConfirmations["host:host1"]; exists {
			t.Error("expected offlineConfirmations to be cleared")
		}
	})

	t.Run("override Disabled clears alert and returns", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.config.Enabled = true
		m.config.Overrides = map[string]ThresholdConfig{
			"host1": {Disabled: true},
		}

		host := models.Host{ID: "host1", Hostname: "test-host"}
		m.HandleHostOffline(host)

		// No alert should be created
		if len(m.activeAlerts) != 0 {
			t.Errorf("expected 0 alerts, got %d", len(m.activeAlerts))
		}
	})

	t.Run("existing alert updates LastSeen", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.config.Enabled = true

		alertID := "host-offline-host1"
		oldTime := time.Now().Add(-1 * time.Hour)
		m.activeAlerts[alertID] = &Alert{ID: alertID, Type: "host-offline", LastSeen: oldTime}

		host := models.Host{ID: "host1", Hostname: "test-host"}
		m.HandleHostOffline(host)

		// LastSeen should be updated
		alert := m.activeAlerts[alertID]
		if alert.LastSeen.Before(time.Now().Add(-1 * time.Minute)) {
			t.Errorf("expected LastSeen to be updated to recent time, got %v", alert.LastSeen)
		}
	})

	t.Run("insufficient confirmations waits", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.config.Enabled = true

		host := models.Host{ID: "host1", Hostname: "test-host"}

		// First two calls should not create alert
		m.HandleHostOffline(host)
		if len(m.activeAlerts) != 0 {
			t.Errorf("expected 0 alerts after 1st call, got %d", len(m.activeAlerts))
		}
		if m.offlineConfirmations["host:host1"] != 1 {
			t.Errorf("expected 1 confirmation, got %d", m.offlineConfirmations["host:host1"])
		}

		m.HandleHostOffline(host)
		if len(m.activeAlerts) != 0 {
			t.Errorf("expected 0 alerts after 2nd call, got %d", len(m.activeAlerts))
		}
		if m.offlineConfirmations["host:host1"] != 2 {
			t.Errorf("expected 2 confirmations, got %d", m.offlineConfirmations["host:host1"])
		}
	})

	t.Run("sufficient confirmations creates alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.config.Enabled = true

		host := models.Host{
			ID:          "host1",
			Hostname:    "test-host",
			DisplayName: "Test Host",
			Platform:    "linux",
			OSName:      "Ubuntu",
			OSVersion:   "22.04",
		}

		// Make 3 calls to reach required confirmations
		m.HandleHostOffline(host)
		m.HandleHostOffline(host)
		m.HandleHostOffline(host)

		// Alert should now be created
		alertID := "host-offline-host1"
		alert, exists := m.activeAlerts[alertID]
		if !exists {
			t.Fatal("expected alert to be created after 3 confirmations")
		}
		if alert.Type != "host-offline" {
			t.Errorf("expected type 'host-offline', got '%s'", alert.Type)
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected level Critical, got '%s'", alert.Level)
		}
		if alert.ResourceName == "" {
			t.Error("expected ResourceName to be set")
		}
	})
}

func TestReevaluateActiveAlertsLocked(t *testing.T) {
	// t.Parallel()

	t.Run("empty alerts map is no-op", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.reevaluateActiveAlertsLocked()
		m.mu.Unlock()

		if len(m.activeAlerts) != 0 {
			t.Errorf("expected 0 alerts, got %d", len(m.activeAlerts))
		}
	})

	t.Run("alert with insufficient ID parts is skipped", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Alert ID without dash separator
		m.activeAlerts["singlepart"] = &Alert{ID: "singlepart", Type: "cpu", Value: 90}

		m.mu.Lock()
		m.reevaluateActiveAlertsLocked()
		m.mu.Unlock()

		// Alert should remain (skipped due to ID format)
		if _, exists := m.activeAlerts["singlepart"]; !exists {
			t.Error("expected singlepart alert to remain")
		}
	})

	t.Run("DisableAllPMG resolves PMG queue alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add PMG queue alert
		m.activeAlerts["pmg-queue-cpu"] = &Alert{
			ID:   "pmg-queue-cpu",
			Type: "queue-depth",
		}

		m.config.DisableAllPMG = true

		m.mu.Lock()
		m.reevaluateActiveAlertsLocked()
		m.mu.Unlock()

		// PMG alert should be resolved
		if _, exists := m.activeAlerts["pmg-queue-cpu"]; exists {
			t.Error("expected PMG alert to be resolved")
		}
	})

	t.Run("DisableAllHosts resolves Host alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add host alert with resourceType metadata
		m.activeAlerts["host-1-cpu"] = &Alert{
			ID:    "host-1-cpu",
			Type:  "cpu",
			Value: 90,
			Metadata: map[string]interface{}{
				"resourceType": "Host",
			},
		}

		m.config.DisableAllHosts = true

		m.mu.Lock()
		m.reevaluateActiveAlertsLocked()
		m.mu.Unlock()

		// Host alert should be resolved
		if _, exists := m.activeAlerts["host-1-cpu"]; exists {
			t.Error("expected Host alert to be resolved")
		}
	})

	t.Run("Docker host offline alerts are skipped", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add docker host offline alert
		m.activeAlerts["docker-host-1-offline"] = &Alert{
			ID:   "docker-host-1-offline",
			Type: "docker-host-offline",
		}

		m.mu.Lock()
		m.reevaluateActiveAlertsLocked()
		m.mu.Unlock()

		// Docker host offline alert should remain (skipped)
		if _, exists := m.activeAlerts["docker-host-1-offline"]; !exists {
			t.Error("expected docker-host-offline alert to remain")
		}
	})

	t.Run("DisableAllDockerHosts resolves dockerhost alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add dockerhost metric alert
		m.activeAlerts["dockerhost-1-cpu"] = &Alert{
			ID:    "dockerhost-1-cpu",
			Type:  "cpu",
			Value: 90,
			Metadata: map[string]interface{}{
				"resourceType": "dockerhost",
			},
		}

		m.config.DisableAllDockerHosts = true

		m.mu.Lock()
		m.reevaluateActiveAlertsLocked()
		m.mu.Unlock()

		// Dockerhost alert should be resolved
		if _, exists := m.activeAlerts["dockerhost-1-cpu"]; exists {
			t.Error("expected dockerhost alert to be resolved")
		}
	})

	t.Run("DisableAllNodes resolves Node alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add node alert with Instance = "Node"
		m.activeAlerts["node1-cpu"] = &Alert{
			ID:       "node1-cpu",
			Type:     "cpu",
			Value:    90,
			Instance: "Node",
		}

		m.config.DisableAllNodes = true

		m.mu.Lock()
		m.reevaluateActiveAlertsLocked()
		m.mu.Unlock()

		// Node alert should be resolved
		if _, exists := m.activeAlerts["node1-cpu"]; exists {
			t.Error("expected Node alert to be resolved")
		}
	})

	t.Run("DisableAllStorage resolves Storage alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add storage alert with Instance = "Storage"
		m.activeAlerts["storage1-usage"] = &Alert{
			ID:       "storage1-usage",
			Type:     "usage",
			Value:    90,
			Instance: "Storage",
		}

		m.config.DisableAllStorage = true

		m.mu.Lock()
		m.reevaluateActiveAlertsLocked()
		m.mu.Unlock()

		// Storage alert should be resolved
		if _, exists := m.activeAlerts["storage1-usage"]; exists {
			t.Error("expected Storage alert to be resolved")
		}
	})

	t.Run("DisableAllPBS resolves PBS alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add PBS alert with Instance = "PBS"
		m.activeAlerts["pbs1-cpu"] = &Alert{
			ID:       "pbs1-cpu",
			Type:     "cpu",
			Value:    90,
			Instance: "PBS",
		}

		m.config.DisableAllPBS = true

		m.mu.Lock()
		m.reevaluateActiveAlertsLocked()
		m.mu.Unlock()

		// PBS alert should be resolved
		if _, exists := m.activeAlerts["pbs1-cpu"]; exists {
			t.Error("expected PBS alert to be resolved")
		}
	})

	t.Run("DisableAllGuests resolves Guest alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add guest alert with Instance set to something other than "Node"/"Storage"/"PBS"
		// Note: If both Instance and Node are empty, it matches the node branch
		m.activeAlerts["guest1-cpu"] = &Alert{
			ID:       "guest1-cpu",
			Type:     "cpu",
			Value:    90,
			Instance: "qemu/100", // Guest instance
			Node:     "pve1",     // Different from Instance, so doesn't match node branch
		}

		m.config.DisableAllGuests = true

		m.mu.Lock()
		m.reevaluateActiveAlertsLocked()
		m.mu.Unlock()

		// Guest alert should be resolved
		if _, exists := m.activeAlerts["guest1-cpu"]; exists {
			t.Error("expected Guest alert to be resolved")
		}
	})

	t.Run("alert with disabled override is resolved", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add guest alert with override
		m.activeAlerts["guest1-cpu"] = &Alert{
			ID:       "guest1-cpu",
			Type:     "cpu",
			Value:    90,
			Instance: "qemu/100",
			Node:     "pve1",
		}
		m.config.Overrides = map[string]ThresholdConfig{
			"guest1": {Disabled: true},
		}

		m.mu.Lock()
		m.reevaluateActiveAlertsLocked()
		m.mu.Unlock()

		// Alert should be resolved due to disabled override
		if _, exists := m.activeAlerts["guest1-cpu"]; exists {
			t.Error("expected alert with disabled override to be resolved")
		}
	})

	t.Run("alert below clear threshold is resolved", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add guest alert below new clear threshold
		m.activeAlerts["guest1-cpu"] = &Alert{
			ID:        "guest1-cpu",
			Type:      "cpu",
			Value:     70, // Below clear threshold
			Threshold: 80,
			Instance:  "qemu/100",
			Node:      "pve1",
		}
		m.config.GuestDefaults.CPU = &HysteresisThreshold{Trigger: 80, Clear: 75}

		m.mu.Lock()
		m.reevaluateActiveAlertsLocked()
		m.mu.Unlock()

		// Alert should be resolved (value 70 < clear 75)
		if _, exists := m.activeAlerts["guest1-cpu"]; exists {
			t.Error("expected alert below clear threshold to be resolved")
		}
	})

	t.Run("alert between clear and trigger is resolved on config change", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add guest alert between clear and new higher trigger
		m.activeAlerts["guest1-cpu"] = &Alert{
			ID:        "guest1-cpu",
			Type:      "cpu",
			Value:     85, // Between clear (75) and new trigger (90)
			Threshold: 80,
			Instance:  "qemu/100",
			Node:      "pve1",
		}
		m.config.GuestDefaults.CPU = &HysteresisThreshold{Trigger: 90, Clear: 75}

		m.mu.Lock()
		m.reevaluateActiveAlertsLocked()
		m.mu.Unlock()

		// Alert should be resolved (value 85 < trigger 90)
		if _, exists := m.activeAlerts["guest1-cpu"]; exists {
			t.Error("expected alert between thresholds to be resolved")
		}
	})
}

func TestHandleHostRemoved(t *testing.T) {
	// t.Parallel()

	t.Run("empty host ID is no-op", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.activeAlerts["host-offline-host1"] = &Alert{ID: "host-offline-host1"}
		m.mu.Unlock()

		// Empty ID host
		m.HandleHostRemoved(models.Host{ID: ""})

		// Alert should still exist
		m.mu.RLock()
		_, exists := m.activeAlerts["host-offline-host1"]
		m.mu.RUnlock()
		if !exists {
			t.Error("expected alert to remain when empty host ID passed")
		}
	})

	t.Run("clears host offline alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.activeAlerts["host-offline-host1"] = &Alert{
			ID:         "host-offline-host1",
			ResourceID: "host:host1",
		}
		m.offlineConfirmations["host:host1"] = 5
		m.mu.Unlock()

		m.HandleHostRemoved(models.Host{ID: "host1", Hostname: "testhost"})

		m.mu.RLock()
		_, alertExists := m.activeAlerts["host-offline-host1"]
		_, confirmExists := m.offlineConfirmations["host:host1"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected host offline alert to be cleared")
		}
		if confirmExists {
			t.Error("expected offline confirmations to be cleared")
		}
	})

	t.Run("clears host metric alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		// Add CPU and memory alerts for host
		m.activeAlerts["host:host1-cpu"] = &Alert{
			ID:         "host:host1-cpu",
			ResourceID: "host:host1",
		}
		m.activeAlerts["host:host1-memory"] = &Alert{
			ID:         "host:host1-memory",
			ResourceID: "host:host1",
		}
		m.mu.Unlock()

		m.HandleHostRemoved(models.Host{ID: "host1", Hostname: "testhost"})

		m.mu.RLock()
		_, cpuExists := m.activeAlerts["host:host1-cpu"]
		_, memExists := m.activeAlerts["host:host1-memory"]
		m.mu.RUnlock()

		if cpuExists {
			t.Error("expected host CPU alert to be cleared")
		}
		if memExists {
			t.Error("expected host memory alert to be cleared")
		}
	})

	t.Run("clears host disk alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		// Add disk alerts for host
		m.activeAlerts["host:host1/disk:sda-usage"] = &Alert{
			ID:         "host:host1/disk:sda-usage",
			ResourceID: "host:host1/disk:sda",
		}
		m.activeAlerts["host:host1/disk:sdb-usage"] = &Alert{
			ID:         "host:host1/disk:sdb-usage",
			ResourceID: "host:host1/disk:sdb",
		}
		m.mu.Unlock()

		m.HandleHostRemoved(models.Host{ID: "host1", Hostname: "testhost"})

		m.mu.RLock()
		_, sda := m.activeAlerts["host:host1/disk:sda-usage"]
		_, sdb := m.activeAlerts["host:host1/disk:sdb-usage"]
		m.mu.RUnlock()

		if sda {
			t.Error("expected host disk sda alert to be cleared")
		}
		if sdb {
			t.Error("expected host disk sdb alert to be cleared")
		}
	})

	t.Run("clears all alert types together", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		// Add multiple alert types
		m.activeAlerts["host-offline-host1"] = &Alert{ID: "host-offline-host1", ResourceID: "host:host1"}
		m.activeAlerts["host:host1-cpu"] = &Alert{ID: "host:host1-cpu", ResourceID: "host:host1"}
		m.activeAlerts["host:host1-memory"] = &Alert{ID: "host:host1-memory", ResourceID: "host:host1"}
		m.activeAlerts["host:host1/disk:sda-usage"] = &Alert{ID: "host:host1/disk:sda-usage", ResourceID: "host:host1/disk:sda"}
		m.offlineConfirmations["host:host1"] = 3
		m.mu.Unlock()

		m.HandleHostRemoved(models.Host{ID: "host1", Hostname: "testhost"})

		m.mu.RLock()
		alertCount := 0
		for id := range m.activeAlerts {
			if strings.Contains(id, "host1") {
				alertCount++
			}
		}
		_, confirmExists := m.offlineConfirmations["host:host1"]
		m.mu.RUnlock()

		if alertCount > 0 {
			t.Errorf("expected all host1 alerts to be cleared, got %d remaining", alertCount)
		}
		if confirmExists {
			t.Error("expected offline confirmations to be cleared")
		}
	})
}

func TestReevaluateGuestAlert(t *testing.T) {
	// t.Parallel()

	t.Run("no active alerts is no-op", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.config.GuestDefaults.CPU = &HysteresisThreshold{Trigger: 80, Clear: 70}
		m.mu.Unlock()

		// No alerts exist - should not panic
		m.ReevaluateGuestAlert(nil, "guest1")

		m.mu.RLock()
		count := len(m.activeAlerts)
		m.mu.RUnlock()
		if count != 0 {
			t.Errorf("expected 0 alerts, got %d", count)
		}
	})

	t.Run("clears alert when threshold disabled (nil)", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.activeAlerts["guest1-cpu"] = &Alert{
			ID:    "guest1-cpu",
			Type:  "cpu",
			Value: 90,
		}
		m.config.GuestDefaults.CPU = nil // Disabled
		m.mu.Unlock()

		m.ReevaluateGuestAlert(nil, "guest1")

		m.mu.RLock()
		_, exists := m.activeAlerts["guest1-cpu"]
		m.mu.RUnlock()
		if exists {
			t.Error("expected alert to be cleared when threshold is nil")
		}
	})

	t.Run("clears alert when trigger is zero", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.activeAlerts["guest1-memory"] = &Alert{
			ID:    "guest1-memory",
			Type:  "memory",
			Value: 85,
		}
		m.config.GuestDefaults.Memory = &HysteresisThreshold{Trigger: 0, Clear: 0}
		m.mu.Unlock()

		m.ReevaluateGuestAlert(nil, "guest1")

		m.mu.RLock()
		_, exists := m.activeAlerts["guest1-memory"]
		m.mu.RUnlock()
		if exists {
			t.Error("expected alert to be cleared when trigger is 0")
		}
	})

	t.Run("clears alert when value below clear threshold", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.activeAlerts["guest1-cpu"] = &Alert{
			ID:    "guest1-cpu",
			Type:  "cpu",
			Value: 65, // Below clear threshold of 70
		}
		m.config.GuestDefaults.CPU = &HysteresisThreshold{Trigger: 80, Clear: 70}
		m.mu.Unlock()

		m.ReevaluateGuestAlert(nil, "guest1")

		m.mu.RLock()
		_, exists := m.activeAlerts["guest1-cpu"]
		m.mu.RUnlock()
		if exists {
			t.Error("expected alert to be cleared when value below clear threshold")
		}
	})

	t.Run("clears alert when value below trigger threshold", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.activeAlerts["guest1-disk"] = &Alert{
			ID:    "guest1-disk",
			Type:  "disk",
			Value: 75, // Below trigger of 80
		}
		m.config.GuestDefaults.Disk = &HysteresisThreshold{Trigger: 80, Clear: 70}
		m.mu.Unlock()

		m.ReevaluateGuestAlert(nil, "guest1")

		m.mu.RLock()
		_, exists := m.activeAlerts["guest1-disk"]
		m.mu.RUnlock()
		if exists {
			t.Error("expected alert to be cleared when value below trigger")
		}
	})

	t.Run("keeps alert when value above both thresholds", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.activeAlerts["guest1-cpu"] = &Alert{
			ID:    "guest1-cpu",
			Type:  "cpu",
			Value: 90, // Above both trigger (80) and clear (70)
		}
		m.config.GuestDefaults.CPU = &HysteresisThreshold{Trigger: 80, Clear: 70}
		m.mu.Unlock()

		m.ReevaluateGuestAlert(nil, "guest1")

		m.mu.RLock()
		_, exists := m.activeAlerts["guest1-cpu"]
		m.mu.RUnlock()
		if !exists {
			t.Error("expected alert to remain when value above thresholds")
		}
	})

	t.Run("processes all metric types", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		// Add alerts for all metric types with values below threshold
		metrics := []string{"cpu", "memory", "disk", "diskRead", "diskWrite", "networkIn", "networkOut"}
		for _, metric := range metrics {
			m.activeAlerts[fmt.Sprintf("guest1-%s", metric)] = &Alert{
				ID:    fmt.Sprintf("guest1-%s", metric),
				Type:  metric,
				Value: 50, // Below threshold
			}
		}
		threshold := &HysteresisThreshold{Trigger: 80, Clear: 70}
		m.config.GuestDefaults.CPU = threshold
		m.config.GuestDefaults.Memory = threshold
		m.config.GuestDefaults.Disk = threshold
		m.config.GuestDefaults.DiskRead = threshold
		m.config.GuestDefaults.DiskWrite = threshold
		m.config.GuestDefaults.NetworkIn = threshold
		m.config.GuestDefaults.NetworkOut = threshold
		m.mu.Unlock()

		m.ReevaluateGuestAlert(nil, "guest1")

		m.mu.RLock()
		remaining := len(m.activeAlerts)
		m.mu.RUnlock()
		if remaining != 0 {
			t.Errorf("expected all alerts to be cleared, got %d remaining", remaining)
		}
	})

	t.Run("clears pending alert when threshold disabled", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.activeAlerts["guest1-cpu"] = &Alert{
			ID:    "guest1-cpu",
			Type:  "cpu",
			Value: 90,
		}
		m.pendingAlerts["guest1-cpu"] = time.Now() // pendingAlerts is map[string]time.Time
		m.config.GuestDefaults.CPU = nil           // Disabled
		m.mu.Unlock()

		m.ReevaluateGuestAlert(nil, "guest1")

		m.mu.RLock()
		_, alertExists := m.activeAlerts["guest1-cpu"]
		_, pendingExists := m.pendingAlerts["guest1-cpu"]
		m.mu.RUnlock()
		if alertExists {
			t.Error("expected active alert to be cleared")
		}
		if pendingExists {
			t.Error("expected pending alert to be cleared")
		}
	})

	t.Run("uses clear equals trigger when clear is zero", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.activeAlerts["guest1-cpu"] = &Alert{
			ID:    "guest1-cpu",
			Type:  "cpu",
			Value: 75, // Below trigger of 80
		}
		// Clear is 0, so it should use trigger (80) as clear threshold
		m.config.GuestDefaults.CPU = &HysteresisThreshold{Trigger: 80, Clear: 0}
		m.mu.Unlock()

		m.ReevaluateGuestAlert(nil, "guest1")

		m.mu.RLock()
		_, exists := m.activeAlerts["guest1-cpu"]
		m.mu.RUnlock()
		if exists {
			t.Error("expected alert to be cleared when value below trigger (used as clear)")
		}
	})

	t.Run("ignores alerts for different guests", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.activeAlerts["guest1-cpu"] = &Alert{
			ID:    "guest1-cpu",
			Type:  "cpu",
			Value: 50, // Below threshold
		}
		m.activeAlerts["guest2-cpu"] = &Alert{
			ID:    "guest2-cpu",
			Type:  "cpu",
			Value: 50, // Below threshold
		}
		m.config.GuestDefaults.CPU = &HysteresisThreshold{Trigger: 80, Clear: 70}
		m.mu.Unlock()

		// Only reevaluate guest1
		m.ReevaluateGuestAlert(nil, "guest1")

		m.mu.RLock()
		_, guest1Exists := m.activeAlerts["guest1-cpu"]
		_, guest2Exists := m.activeAlerts["guest2-cpu"]
		m.mu.RUnlock()

		if guest1Exists {
			t.Error("expected guest1 alert to be cleared")
		}
		if !guest2Exists {
			t.Error("expected guest2 alert to remain (not reevaluated)")
		}
	})
}

func TestHandleDockerHostOffline(t *testing.T) {
	// t.Parallel()

	t.Run("empty host ID is no-op", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		initialCount := len(m.activeAlerts)
		m.mu.Unlock()

		m.HandleDockerHostOffline(models.DockerHost{ID: ""})

		m.mu.RLock()
		finalCount := len(m.activeAlerts)
		m.mu.RUnlock()
		if finalCount != initialCount {
			t.Error("expected no change when empty host ID passed")
		}
	})

	t.Run("disabled alerts is no-op", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = false
		m.mu.Unlock()

		m.HandleDockerHostOffline(models.DockerHost{ID: "docker1", DisplayName: "Docker Host 1"})

		m.mu.RLock()
		_, exists := m.activeAlerts["docker-host-offline-docker1"]
		m.mu.RUnlock()
		if exists {
			t.Error("expected no alert when alerts are disabled")
		}
	})

	t.Run("DisableAllDockerHostsOffline clears tracking and alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.config.DisableAllDockerHostsOffline = true
		m.dockerOfflineCount["docker1"] = 5
		m.activeAlerts["docker-host-offline-docker1"] = &Alert{ID: "docker-host-offline-docker1"}
		m.mu.Unlock()

		m.HandleDockerHostOffline(models.DockerHost{ID: "docker1", DisplayName: "Docker Host 1"})

		m.mu.RLock()
		_, alertExists := m.activeAlerts["docker-host-offline-docker1"]
		_, countExists := m.dockerOfflineCount["docker1"]
		m.mu.RUnlock()
		if alertExists {
			t.Error("expected alert to be cleared")
		}
		if countExists {
			t.Error("expected offline count to be cleared")
		}
	})

	t.Run("override DisableConnectivity clears tracking and alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.config.Overrides = map[string]ThresholdConfig{
			"docker1": {DisableConnectivity: true},
		}
		m.dockerOfflineCount["docker1"] = 3
		m.activeAlerts["docker-host-offline-docker1"] = &Alert{ID: "docker-host-offline-docker1"}
		m.mu.Unlock()

		m.HandleDockerHostOffline(models.DockerHost{ID: "docker1", DisplayName: "Docker Host 1"})

		m.mu.RLock()
		_, alertExists := m.activeAlerts["docker-host-offline-docker1"]
		_, countExists := m.dockerOfflineCount["docker1"]
		m.mu.RUnlock()
		if alertExists {
			t.Error("expected alert to be cleared with override")
		}
		if countExists {
			t.Error("expected offline count to be cleared with override")
		}
	})

	t.Run("existing alert updates LastSeen", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		oldTime := time.Now().Add(-1 * time.Hour)
		m.mu.Lock()
		m.config.Enabled = true
		m.activeAlerts["docker-host-offline-docker1"] = &Alert{
			ID:       "docker-host-offline-docker1",
			LastSeen: oldTime,
		}
		m.mu.Unlock()

		m.HandleDockerHostOffline(models.DockerHost{ID: "docker1", DisplayName: "Docker Host 1"})

		m.mu.RLock()
		alert := m.activeAlerts["docker-host-offline-docker1"]
		m.mu.RUnlock()
		if alert == nil {
			t.Fatal("expected alert to exist")
		}
		if !alert.LastSeen.After(oldTime) {
			t.Error("expected LastSeen to be updated")
		}
	})

	t.Run("requires 3 confirmations before alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.mu.Unlock()

		host := models.DockerHost{ID: "docker1", DisplayName: "Docker Host 1", Hostname: "docker-server"}

		// First call - confirmation 1
		m.HandleDockerHostOffline(host)
		m.mu.RLock()
		count1 := m.dockerOfflineCount["docker1"]
		alert1 := m.activeAlerts["docker-host-offline-docker1"]
		m.mu.RUnlock()
		if count1 != 1 {
			t.Errorf("expected count 1, got %d", count1)
		}
		if alert1 != nil {
			t.Error("expected no alert after 1 confirmation")
		}

		// Second call - confirmation 2
		m.HandleDockerHostOffline(host)
		m.mu.RLock()
		count2 := m.dockerOfflineCount["docker1"]
		alert2 := m.activeAlerts["docker-host-offline-docker1"]
		m.mu.RUnlock()
		if count2 != 2 {
			t.Errorf("expected count 2, got %d", count2)
		}
		if alert2 != nil {
			t.Error("expected no alert after 2 confirmations")
		}

		// Third call - confirmation 3 - should create alert
		m.HandleDockerHostOffline(host)
		m.mu.RLock()
		count3 := m.dockerOfflineCount["docker1"]
		alert3 := m.activeAlerts["docker-host-offline-docker1"]
		m.mu.RUnlock()
		if count3 != 3 {
			t.Errorf("expected count 3, got %d", count3)
		}
		if alert3 == nil {
			t.Fatal("expected alert after 3 confirmations")
		}
		if alert3.Type != "docker-host-offline" {
			t.Errorf("expected type docker-host-offline, got %s", alert3.Type)
		}
		if alert3.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert3.Level)
		}
	})

	t.Run("alert has correct metadata", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = true
		m.dockerOfflineCount["docker1"] = 2 // Pre-set to trigger on next call
		m.mu.Unlock()

		host := models.DockerHost{
			ID:          "docker1",
			DisplayName: "My Docker Host",
			Hostname:    "docker-server.local",
			AgentID:     "agent-123",
		}

		m.HandleDockerHostOffline(host)

		m.mu.RLock()
		alert := m.activeAlerts["docker-host-offline-docker1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to be created")
		}
		if alert.ResourceID != "docker:docker1" {
			t.Errorf("expected resourceID docker:docker1, got %s", alert.ResourceID)
		}
		if alert.ResourceName != "My Docker Host" {
			t.Errorf("expected resourceName 'My Docker Host', got %s", alert.ResourceName)
		}
		if alert.Node != "docker-server.local" {
			t.Errorf("expected node docker-server.local, got %s", alert.Node)
		}
		if alert.Metadata["resourceType"] != "DockerHost" {
			t.Errorf("expected metadata resourceType DockerHost, got %v", alert.Metadata["resourceType"])
		}
		if alert.Metadata["hostId"] != "docker1" {
			t.Errorf("expected metadata hostId docker1, got %v", alert.Metadata["hostId"])
		}
		if alert.Metadata["agentId"] != "agent-123" {
			t.Errorf("expected metadata agentId agent-123, got %v", alert.Metadata["agentId"])
		}
	})
}

func TestSetMetricHooks(t *testing.T) {
	// NOT parallel - modifies package-level state
	// Save existing state and restore after test
	oldFired := recordAlertFired
	oldResolved := recordAlertResolved
	oldSuppressed := recordAlertSuppressed
	oldAcknowledged := recordAlertAcknowledged
	defer func() {
		recordAlertFired = oldFired
		recordAlertResolved = oldResolved
		recordAlertSuppressed = oldSuppressed
		recordAlertAcknowledged = oldAcknowledged
	}()

	t.Run("sets all hooks", func(t *testing.T) {
		var firedCalled, resolvedCalled, suppressedCalled, acknowledgedCalled bool

		SetMetricHooks(
			func(a *Alert) { firedCalled = true },
			func(a *Alert) { resolvedCalled = true },
			func(s string) { suppressedCalled = true },
			func() { acknowledgedCalled = true },
		)

		// Verify hooks are set by calling them (if they were nil, this would panic)
		if recordAlertFired != nil {
			recordAlertFired(&Alert{})
		}
		if recordAlertResolved != nil {
			recordAlertResolved(&Alert{})
		}
		if recordAlertSuppressed != nil {
			recordAlertSuppressed("test")
		}
		if recordAlertAcknowledged != nil {
			recordAlertAcknowledged()
		}

		if !firedCalled {
			t.Error("expected fired hook to be called")
		}
		if !resolvedCalled {
			t.Error("expected resolved hook to be called")
		}
		if !suppressedCalled {
			t.Error("expected suppressed hook to be called")
		}
		if !acknowledgedCalled {
			t.Error("expected acknowledged hook to be called")
		}
	})

	t.Run("nil hooks are safe", func(t *testing.T) {
		SetMetricHooks(nil, nil, nil, nil)

		// Should not panic
		if recordAlertFired != nil {
			t.Error("expected fired hook to be nil")
		}
		if recordAlertResolved != nil {
			t.Error("expected resolved hook to be nil")
		}
	})
}

func TestNotifyExistingAlert(t *testing.T) {
	// t.Parallel()

	t.Run("non-existent alert is no-op", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Should not panic
		m.NotifyExistingAlert("non-existent-alert")
	})

	t.Run("existing alert dispatches notification", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		dispatchedCh := make(chan bool, 1)
		m.SetAlertCallback(func(a *Alert) {
			dispatchedCh <- true
		})

		m.mu.Lock()
		m.config.Enabled = true
		m.config.ActivationState = ActivationActive // Must be active to dispatch
		m.activeAlerts["test-alert"] = &Alert{
			ID:    "test-alert",
			Type:  "test",
			Level: AlertLevelWarning,
		}
		m.mu.Unlock()

		m.NotifyExistingAlert("test-alert")

		// Wait for async dispatch with timeout
		select {
		case <-dispatchedCh:
			// Success
		case <-time.After(1 * time.Second):
			t.Error("expected alert callback to be called (timeout)")
		}
	})
}

func TestGetResolvedAlert(t *testing.T) {
	// t.Parallel()

	t.Run("returns nil for non-existent alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		result := m.GetResolvedAlert("non-existent")
		if result != nil {
			t.Error("expected nil for non-existent alert")
		}
	})

	t.Run("returns nil for nil resolved entry", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.resolvedMutex.Lock()
		m.recentlyResolved["test"] = nil
		m.resolvedMutex.Unlock()

		result := m.GetResolvedAlert("test")
		if result != nil {
			t.Error("expected nil for nil resolved entry")
		}
	})

	t.Run("returns nil when Alert is nil", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.resolvedMutex.Lock()
		m.recentlyResolved["test"] = &ResolvedAlert{Alert: nil}
		m.resolvedMutex.Unlock()

		result := m.GetResolvedAlert("test")
		if result != nil {
			t.Error("expected nil when Alert is nil")
		}
	})

	t.Run("returns cloned alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		resolvedTime := time.Now()
		m.resolvedMutex.Lock()
		m.recentlyResolved["test"] = &ResolvedAlert{
			Alert: &Alert{
				ID:           "test",
				Type:         "cpu",
				Level:        AlertLevelWarning,
				ResourceID:   "res1",
				ResourceName: "Resource 1",
			},
			ResolvedTime: resolvedTime,
		}
		m.resolvedMutex.Unlock()

		result := m.GetResolvedAlert("test")
		if result == nil {
			t.Fatal("expected non-nil result")
		}
		if result.Alert.ID != "test" {
			t.Errorf("expected ID test, got %s", result.Alert.ID)
		}
		if result.ResolvedTime != resolvedTime {
			t.Error("expected resolved time to match")
		}
	})
}

func TestGetAlertHistory(t *testing.T) {
	// t.Parallel()

	t.Run("returns history from history manager", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add some alerts to history
		m.historyManager.AddAlert(Alert{ID: "alert1", Type: "cpu"})
		m.historyManager.AddAlert(Alert{ID: "alert2", Type: "memory"})

		history := m.GetAlertHistory(10)
		if len(history) < 2 {
			t.Errorf("expected at least 2 history entries, got %d", len(history))
		}
	})

	t.Run("respects limit", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add alerts
		for i := 0; i < 5; i++ {
			m.historyManager.AddAlert(Alert{ID: fmt.Sprintf("alert%d", i), Type: "test"})
		}

		history := m.GetAlertHistory(2)
		if len(history) > 2 {
			t.Errorf("expected max 2 entries, got %d", len(history))
		}
	})
}

func TestGetAlertHistorySince(t *testing.T) {
	// t.Parallel()

	t.Run("zero time returns all history", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.historyManager.AddAlert(Alert{ID: "alert1", Type: "cpu"})

		history := m.GetAlertHistorySince(time.Time{}, 10)
		if len(history) == 0 {
			t.Error("expected history entries for zero time")
		}
	})

	t.Run("filters by time", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add an alert
		m.historyManager.AddAlert(Alert{ID: "alert1", Type: "cpu", StartTime: time.Now()})

		// Query for alerts after now + 1 hour (should return none)
		future := time.Now().Add(1 * time.Hour)
		history := m.GetAlertHistorySince(future, 10)
		if len(history) != 0 {
			t.Errorf("expected 0 entries for future time, got %d", len(history))
		}
	})
}

func TestClearAlertHistory(t *testing.T) {
	// t.Parallel()

	t.Run("clears all history", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Add some alerts
		m.historyManager.AddAlert(Alert{ID: "alert1", Type: "cpu"})
		m.historyManager.AddAlert(Alert{ID: "alert2", Type: "memory"})

		err := m.ClearAlertHistory()
		if err != nil {
			t.Fatalf("unexpected error: %v", err)
		}

		history := m.GetAlertHistory(10)
		if len(history) != 0 {
			t.Errorf("expected 0 entries after clear, got %d", len(history))
		}
	})
}

func TestClearNodeOfflineAlert(t *testing.T) {
	// t.Parallel()

	t.Run("no alert and no count is no-op", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		node := models.Node{ID: "node1", Name: "Node 1"}
		m.clearNodeOfflineAlert(node)

		m.mu.RLock()
		alertCount := len(m.activeAlerts)
		m.mu.RUnlock()
		if alertCount != 0 {
			t.Errorf("expected 0 alerts, got %d", alertCount)
		}
	})

	t.Run("resets offline count when node comes online", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.nodeOfflineCount["node1"] = 5
		m.mu.Unlock()

		node := models.Node{ID: "node1", Name: "Node 1"}
		m.clearNodeOfflineAlert(node)

		m.mu.RLock()
		_, exists := m.nodeOfflineCount["node1"]
		m.mu.RUnlock()
		if exists {
			t.Error("expected offline count to be cleared")
		}
	})

	t.Run("clears existing alert and adds to resolved", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		resolvedCh := make(chan struct{}, 1)
		m.SetResolvedCallback(func(alertID string) {
			resolvedCh <- struct{}{}
		})

		m.mu.Lock()
		m.nodeOfflineCount["node1"] = 3
		m.activeAlerts["node-offline-node1"] = &Alert{
			ID:        "node-offline-node1",
			Type:      "offline",
			StartTime: time.Now().Add(-10 * time.Minute),
		}
		m.mu.Unlock()

		node := models.Node{ID: "node1", Name: "Node 1", Instance: "pve1"}
		m.clearNodeOfflineAlert(node)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["node-offline-node1"]
		_, countExists := m.nodeOfflineCount["node1"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected alert to be cleared")
		}
		if countExists {
			t.Error("expected offline count to be cleared")
		}

		// Check resolved
		m.resolvedMutex.RLock()
		resolved := m.recentlyResolved["node-offline-node1"]
		m.resolvedMutex.RUnlock()
		if resolved == nil {
			t.Error("expected alert to be added to recently resolved")
		}
		select {
		case <-resolvedCh:
		case <-time.After(2 * time.Second):
			t.Error("expected resolved callback to be called")
		}
	})
}

// TestClearOfflineAlertNoDeadlock is a regression test for a deadlock introduced
// by commit 07b4765b. The resolved callback (handleAlertResolved) calls
// ShouldSuppressResolvedNotification which acquires m.mu.RLock(). If the
// clear*OfflineAlert functions call the callback synchronously while holding
// m.mu.Lock(), Go's non-reentrant RWMutex deadlocks.
func TestClearOfflineAlertNoDeadlock(t *testing.T) {
	// t.Parallel()

	type testCase struct {
		name    string
		setupFn func(m *Manager)
		clearFn func(m *Manager)
	}

	cases := []testCase{
		{
			name: "clearNodeOfflineAlert",
			setupFn: func(m *Manager) {
				m.mu.Lock()
				m.activeAlerts["node-offline-node1"] = &Alert{
					ID:        "node-offline-node1",
					Type:      "offline",
					StartTime: time.Now().Add(-5 * time.Minute),
				}
				m.mu.Unlock()
			},
			clearFn: func(m *Manager) {
				m.clearNodeOfflineAlert(models.Node{ID: "node1", Name: "Node 1", Instance: "pve1"})
			},
		},
		{
			name: "clearPBSOfflineAlert",
			setupFn: func(m *Manager) {
				m.mu.Lock()
				m.activeAlerts["pbs-offline-pbs1"] = &Alert{
					ID:        "pbs-offline-pbs1",
					Type:      "offline",
					StartTime: time.Now().Add(-5 * time.Minute),
				}
				m.mu.Unlock()
			},
			clearFn: func(m *Manager) {
				m.clearPBSOfflineAlert(models.PBSInstance{ID: "pbs1", Name: "PBS 1", Host: "host1"})
			},
		},
		{
			name: "clearPMGOfflineAlert",
			setupFn: func(m *Manager) {
				m.mu.Lock()
				m.activeAlerts["pmg-offline-pmg1"] = &Alert{
					ID:        "pmg-offline-pmg1",
					Type:      "offline",
					StartTime: time.Now().Add(-5 * time.Minute),
				}
				m.mu.Unlock()
			},
			clearFn: func(m *Manager) {
				m.clearPMGOfflineAlert(models.PMGInstance{ID: "pmg1", Name: "PMG 1", Host: "host1"})
			},
		},
		{
			name: "clearStorageOfflineAlert",
			setupFn: func(m *Manager) {
				m.mu.Lock()
				m.activeAlerts["storage-offline-stor1"] = &Alert{
					ID:        "storage-offline-stor1",
					Type:      "offline",
					StartTime: time.Now().Add(-5 * time.Minute),
				}
				m.mu.Unlock()
			},
			clearFn: func(m *Manager) {
				m.clearStorageOfflineAlert(models.Storage{ID: "stor1", Name: "Storage 1", Node: "node1"})
			},
		},
		{
			name: "clearGuestPoweredOffAlert",
			setupFn: func(m *Manager) {
				m.mu.Lock()
				m.activeAlerts["guest-powered-off-vm100"] = &Alert{
					ID:        "guest-powered-off-vm100",
					Type:      "powered-off",
					StartTime: time.Now().Add(-5 * time.Minute),
				}
				m.mu.Unlock()
			},
			clearFn: func(m *Manager) {
				m.clearGuestPoweredOffAlert("vm100", "TestVM")
			},
		},
	}

	for _, tc := range cases {
		t.Run(tc.name, func(t *testing.T) {
			m := newTestManager(t)

			// Simulate what handleAlertResolved does in production:
			// it calls ShouldSuppressResolvedNotification which acquires m.mu.RLock().
			// Before the fix, this deadlocked because the caller held m.mu.Lock().
			done := make(chan struct{})
			m.SetResolvedCallback(func(alertID string) {
				_ = m.ShouldSuppressResolvedNotification(&Alert{ID: alertID})
				close(done)
			})

			tc.setupFn(m)
			tc.clearFn(m)

			select {
			case <-done:
				// Callback completed without deadlock
			case <-time.After(3 * time.Second):
				t.Fatal("deadlock: resolved callback did not complete within 3 seconds")
			}
		})
	}
}

func TestClearPBSOfflineAlert(t *testing.T) {
	// t.Parallel()

	t.Run("no alert and no count is no-op", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pbs := models.PBSInstance{ID: "pbs1", Name: "PBS 1"}
		m.clearPBSOfflineAlert(pbs)

		m.mu.RLock()
		alertCount := len(m.activeAlerts)
		m.mu.RUnlock()
		if alertCount != 0 {
			t.Errorf("expected 0 alerts, got %d", alertCount)
		}
	})

	t.Run("resets offline confirmation count", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.offlineConfirmations["pbs1"] = 5
		m.mu.Unlock()

		pbs := models.PBSInstance{ID: "pbs1", Name: "PBS 1"}
		m.clearPBSOfflineAlert(pbs)

		m.mu.RLock()
		_, exists := m.offlineConfirmations["pbs1"]
		m.mu.RUnlock()
		if exists {
			t.Error("expected offline confirmation count to be cleared")
		}
	})

	t.Run("clears existing alert and adds to resolved", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		resolvedCh := make(chan struct{}, 1)
		m.SetResolvedCallback(func(alertID string) {
			resolvedCh <- struct{}{}
		})

		m.mu.Lock()
		m.offlineConfirmations["pbs1"] = 3
		m.activeAlerts["pbs-offline-pbs1"] = &Alert{
			ID:        "pbs-offline-pbs1",
			Type:      "offline",
			StartTime: time.Now().Add(-5 * time.Minute),
		}
		m.mu.Unlock()

		pbs := models.PBSInstance{ID: "pbs1", Name: "PBS 1", Host: "pbs.local"}
		m.clearPBSOfflineAlert(pbs)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["pbs-offline-pbs1"]
		_, countExists := m.offlineConfirmations["pbs1"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected alert to be cleared")
		}
		if countExists {
			t.Error("expected offline confirmation count to be cleared")
		}

		// Check resolved
		m.resolvedMutex.RLock()
		resolved := m.recentlyResolved["pbs-offline-pbs1"]
		m.resolvedMutex.RUnlock()
		if resolved == nil {
			t.Error("expected alert to be added to recently resolved")
		}
		select {
		case <-resolvedCh:
		case <-time.After(2 * time.Second):
			t.Error("expected resolved callback to be called")
		}
	})
}

func TestClearPMGOfflineAlert(t *testing.T) {
	// t.Parallel()

	t.Run("no alert and no count is no-op", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{ID: "pmg1", Name: "PMG 1"}
		m.clearPMGOfflineAlert(pmg)

		m.mu.RLock()
		alertCount := len(m.activeAlerts)
		m.mu.RUnlock()
		if alertCount != 0 {
			t.Errorf("expected 0 alerts, got %d", alertCount)
		}
	})

	t.Run("resets offline confirmation count", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.offlineConfirmations["pmg1"] = 5
		m.mu.Unlock()

		pmg := models.PMGInstance{ID: "pmg1", Name: "PMG 1"}
		m.clearPMGOfflineAlert(pmg)

		m.mu.RLock()
		_, exists := m.offlineConfirmations["pmg1"]
		m.mu.RUnlock()
		if exists {
			t.Error("expected offline confirmation count to be cleared")
		}
	})

	t.Run("clears existing alert and adds to resolved", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		resolvedCh := make(chan struct{}, 1)
		m.SetResolvedCallback(func(alertID string) {
			resolvedCh <- struct{}{}
		})

		m.mu.Lock()
		m.offlineConfirmations["pmg1"] = 3
		m.activeAlerts["pmg-offline-pmg1"] = &Alert{
			ID:        "pmg-offline-pmg1",
			Type:      "offline",
			StartTime: time.Now().Add(-5 * time.Minute),
		}
		m.mu.Unlock()

		pmg := models.PMGInstance{ID: "pmg1", Name: "PMG 1", Host: "pmg.local"}
		m.clearPMGOfflineAlert(pmg)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["pmg-offline-pmg1"]
		_, countExists := m.offlineConfirmations["pmg1"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected alert to be cleared")
		}
		if countExists {
			t.Error("expected offline confirmation count to be cleared")
		}

		// Check resolved
		m.resolvedMutex.RLock()
		resolved := m.recentlyResolved["pmg-offline-pmg1"]
		m.resolvedMutex.RUnlock()
		if resolved == nil {
			t.Error("expected alert to be added to recently resolved")
		}
		select {
		case <-resolvedCh:
		case <-time.After(2 * time.Second):
			t.Error("expected resolved callback to be called")
		}
	})
}

func TestCheckNodeOffline(t *testing.T) {
	// t.Parallel()

	t.Run("override DisableConnectivity clears alert and returns", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Overrides = map[string]ThresholdConfig{
			"node1": {DisableConnectivity: true},
		}
		m.activeAlerts["node-offline-node1"] = &Alert{ID: "node-offline-node1"}
		m.nodeOfflineCount["node1"] = 5
		m.mu.Unlock()

		node := models.Node{ID: "node1", Name: "Node 1"}
		m.checkNodeOffline(node)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["node-offline-node1"]
		_, countExists := m.nodeOfflineCount["node1"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected alert to be cleared when connectivity disabled")
		}
		if countExists {
			t.Error("expected offline count to be cleared")
		}
	})

	t.Run("existing alert updates LastSeen", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		oldTime := time.Now().Add(-1 * time.Hour)
		m.mu.Lock()
		m.activeAlerts["node-offline-node1"] = &Alert{
			ID:        "node-offline-node1",
			StartTime: oldTime,
			LastSeen:  oldTime,
		}
		m.mu.Unlock()

		node := models.Node{ID: "node1", Name: "Node 1"}
		m.checkNodeOffline(node)

		m.mu.RLock()
		alert := m.activeAlerts["node-offline-node1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to exist")
		}
		if !alert.LastSeen.After(oldTime) {
			t.Error("expected LastSeen to be updated")
		}
		if !alert.StartTime.Equal(oldTime) {
			t.Error("expected StartTime to remain unchanged")
		}
	})

	t.Run("insufficient confirmations waits", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		node := models.Node{ID: "node1", Name: "Node 1", Instance: "pve1"}

		// First call - count 1
		m.checkNodeOffline(node)
		m.mu.RLock()
		count1 := m.nodeOfflineCount["node1"]
		alert1 := m.activeAlerts["node-offline-node1"]
		m.mu.RUnlock()
		if count1 != 1 {
			t.Errorf("expected count 1, got %d", count1)
		}
		if alert1 != nil {
			t.Error("expected no alert after 1 confirmation")
		}

		// Second call - count 2
		m.checkNodeOffline(node)
		m.mu.RLock()
		count2 := m.nodeOfflineCount["node1"]
		alert2 := m.activeAlerts["node-offline-node1"]
		m.mu.RUnlock()
		if count2 != 2 {
			t.Errorf("expected count 2, got %d", count2)
		}
		if alert2 != nil {
			t.Error("expected no alert after 2 confirmations")
		}
	})

	t.Run("creates alert after 3 confirmations", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.nodeOfflineCount["node1"] = 2 // Pre-set to trigger on next call
		m.mu.Unlock()

		node := models.Node{
			ID:               "node1",
			Name:             "Node 1",
			Instance:         "pve1",
			Status:           "offline",
			ConnectionHealth: "disconnected",
		}

		m.checkNodeOffline(node)

		m.mu.RLock()
		alert := m.activeAlerts["node-offline-node1"]
		count := m.nodeOfflineCount["node1"]
		m.mu.RUnlock()

		if count != 3 {
			t.Errorf("expected count 3, got %d", count)
		}
		if alert == nil {
			t.Fatal("expected alert after 3 confirmations")
		}
		if alert.Type != "connectivity" {
			t.Errorf("expected type connectivity, got %s", alert.Type)
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
		if alert.ResourceID != "node1" {
			t.Errorf("expected resourceID node1, got %s", alert.ResourceID)
		}
	})

	t.Run("alert added to history", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.nodeOfflineCount["node1"] = 2
		m.mu.Unlock()

		node := models.Node{ID: "node1", Name: "Node 1", Instance: "pve1"}
		m.checkNodeOffline(node)

		// Check history
		history := m.GetAlertHistory(10)
		found := false
		for _, h := range history {
			if h.ID == "node-offline-node1" {
				found = true
				break
			}
		}
		if !found {
			t.Error("expected alert to be added to history")
		}
	})
}

func TestCheckPBSOffline(t *testing.T) {
	// t.Parallel()

	t.Run("override Disabled clears alert and returns", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Overrides = map[string]ThresholdConfig{
			"pbs1": {Disabled: true},
		}
		m.activeAlerts["pbs-offline-pbs1"] = &Alert{ID: "pbs-offline-pbs1"}
		m.mu.Unlock()

		pbs := models.PBSInstance{ID: "pbs1", Name: "PBS 1"}
		m.checkPBSOffline(pbs)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["pbs-offline-pbs1"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected alert to be cleared when disabled")
		}
	})

	t.Run("override DisableConnectivity clears alert and returns", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Overrides = map[string]ThresholdConfig{
			"pbs1": {DisableConnectivity: true},
		}
		m.activeAlerts["pbs-offline-pbs1"] = &Alert{ID: "pbs-offline-pbs1"}
		m.mu.Unlock()

		pbs := models.PBSInstance{ID: "pbs1", Name: "PBS 1"}
		m.checkPBSOffline(pbs)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["pbs-offline-pbs1"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected alert to be cleared when connectivity disabled")
		}
	})

	t.Run("insufficient confirmations waits", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pbs := models.PBSInstance{ID: "pbs1", Name: "PBS 1"}

		// First two calls - not enough confirmations
		m.checkPBSOffline(pbs)
		m.checkPBSOffline(pbs)

		m.mu.RLock()
		count := m.offlineConfirmations["pbs1"]
		_, alertExists := m.activeAlerts["pbs-offline-pbs1"]
		m.mu.RUnlock()

		if count != 2 {
			t.Errorf("expected count 2, got %d", count)
		}
		if alertExists {
			t.Error("expected no alert after 2 confirmations")
		}
	})

	t.Run("creates alert after 3 confirmations", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.offlineConfirmations["pbs1"] = 2
		m.mu.Unlock()

		pbs := models.PBSInstance{ID: "pbs1", Name: "PBS 1", Host: "pbs.local"}
		m.checkPBSOffline(pbs)

		m.mu.RLock()
		alert := m.activeAlerts["pbs-offline-pbs1"]
		count := m.offlineConfirmations["pbs1"]
		m.mu.RUnlock()

		if count != 3 {
			t.Errorf("expected count 3, got %d", count)
		}
		if alert == nil {
			t.Fatal("expected alert after 3 confirmations")
		}
		if alert.Type != "offline" {
			t.Errorf("expected type offline, got %s", alert.Type)
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
	})

	t.Run("existing alert updates LastSeen", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		oldTime := time.Now().Add(-1 * time.Hour)
		m.mu.Lock()
		m.offlineConfirmations["pbs1"] = 3
		m.activeAlerts["pbs-offline-pbs1"] = &Alert{
			ID:       "pbs-offline-pbs1",
			LastSeen: oldTime,
		}
		m.mu.Unlock()

		pbs := models.PBSInstance{ID: "pbs1", Name: "PBS 1"}
		m.checkPBSOffline(pbs)

		m.mu.RLock()
		alert := m.activeAlerts["pbs-offline-pbs1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to exist")
		}
		if !alert.LastSeen.After(oldTime) {
			t.Error("expected LastSeen to be updated")
		}
	})
}

func TestCheckPMGOffline(t *testing.T) {
	// t.Parallel()

	t.Run("override Disabled clears alert and returns", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Overrides = map[string]ThresholdConfig{
			"pmg1": {Disabled: true},
		}
		m.activeAlerts["pmg-offline-pmg1"] = &Alert{ID: "pmg-offline-pmg1"}
		m.mu.Unlock()

		pmg := models.PMGInstance{ID: "pmg1", Name: "PMG 1"}
		m.checkPMGOffline(pmg)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["pmg-offline-pmg1"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected alert to be cleared when disabled")
		}
	})

	t.Run("override DisableConnectivity clears alert and returns", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Overrides = map[string]ThresholdConfig{
			"pmg1": {DisableConnectivity: true},
		}
		m.activeAlerts["pmg-offline-pmg1"] = &Alert{ID: "pmg-offline-pmg1"}
		m.mu.Unlock()

		pmg := models.PMGInstance{ID: "pmg1", Name: "PMG 1"}
		m.checkPMGOffline(pmg)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["pmg-offline-pmg1"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected alert to be cleared when connectivity disabled")
		}
	})

	t.Run("insufficient confirmations waits", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{ID: "pmg1", Name: "PMG 1"}

		// First two calls - not enough confirmations
		m.checkPMGOffline(pmg)
		m.checkPMGOffline(pmg)

		m.mu.RLock()
		count := m.offlineConfirmations["pmg1"]
		_, alertExists := m.activeAlerts["pmg-offline-pmg1"]
		m.mu.RUnlock()

		if count != 2 {
			t.Errorf("expected count 2, got %d", count)
		}
		if alertExists {
			t.Error("expected no alert after 2 confirmations")
		}
	})

	t.Run("creates alert after 3 confirmations", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.offlineConfirmations["pmg1"] = 2
		m.mu.Unlock()

		pmg := models.PMGInstance{ID: "pmg1", Name: "PMG 1", Host: "pmg.local"}
		m.checkPMGOffline(pmg)

		m.mu.RLock()
		alert := m.activeAlerts["pmg-offline-pmg1"]
		count := m.offlineConfirmations["pmg1"]
		m.mu.RUnlock()

		if count != 3 {
			t.Errorf("expected count 3, got %d", count)
		}
		if alert == nil {
			t.Fatal("expected alert after 3 confirmations")
		}
		if alert.Type != "offline" {
			t.Errorf("expected type offline, got %s", alert.Type)
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
	})

	t.Run("existing alert updates LastSeen", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		oldTime := time.Now().Add(-1 * time.Hour)
		m.mu.Lock()
		m.offlineConfirmations["pmg1"] = 3
		m.activeAlerts["pmg-offline-pmg1"] = &Alert{
			ID:       "pmg-offline-pmg1",
			LastSeen: oldTime,
		}
		m.mu.Unlock()

		pmg := models.PMGInstance{ID: "pmg1", Name: "PMG 1"}
		m.checkPMGOffline(pmg)

		m.mu.RLock()
		alert := m.activeAlerts["pmg-offline-pmg1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to exist")
		}
		if !alert.LastSeen.After(oldTime) {
			t.Error("expected LastSeen to be updated")
		}
	})
}

func TestCalculateTrimmedBaseline(t *testing.T) {
	// t.Parallel()

	t.Run("less than 12 samples returns untrustworthy", func(t *testing.T) {
		// t.Parallel()
		samples := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
		baseline, trustworthy := calculateTrimmedBaseline(samples)
		if trustworthy {
			t.Error("expected untrustworthy with less than 12 samples")
		}
		if baseline != 0 {
			t.Errorf("expected baseline 0, got %f", baseline)
		}
	})

	t.Run("empty samples returns untrustworthy", func(t *testing.T) {
		// t.Parallel()
		samples := []float64{}
		baseline, trustworthy := calculateTrimmedBaseline(samples)
		if trustworthy {
			t.Error("expected untrustworthy with empty samples")
		}
		if baseline != 0 {
			t.Errorf("expected baseline 0, got %f", baseline)
		}
	})

	t.Run("12-23 samples uses simple mean", func(t *testing.T) {
		// t.Parallel()
		// 12 samples summing to 78
		samples := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}
		baseline, trustworthy := calculateTrimmedBaseline(samples)
		if !trustworthy {
			t.Error("expected trustworthy with 12 samples")
		}
		// Mean of 1-12 is (1+2+...+12)/12 = 78/12 = 6.5
		if baseline != 6.5 {
			t.Errorf("expected baseline 6.5, got %f", baseline)
		}
	})

	t.Run("24+ samples uses trimmed mean", func(t *testing.T) {
		// t.Parallel()
		// 24 identical values - trimmed mean should equal value
		samples := make([]float64, 24)
		for i := range samples {
			samples[i] = 10.0
		}
		baseline, trustworthy := calculateTrimmedBaseline(samples)
		if !trustworthy {
			t.Error("expected trustworthy with 24 samples")
		}
		if baseline != 10.0 {
			t.Errorf("expected baseline 10.0, got %f", baseline)
		}
	})

	t.Run("24+ samples falls back to median when diff > 40%", func(t *testing.T) {
		// t.Parallel()
		// Create samples where trimmed mean differs significantly from median
		// Mostly 10s with some extreme outliers that survive trimming
		samples := make([]float64, 24)
		for i := range samples {
			if i < 4 {
				samples[i] = 100.0 // Extreme high values
			} else {
				samples[i] = 10.0 // Normal values
			}
		}
		// After sorting: 10,10,...,10,100,100,100,100
		// Median is 10 (middle values are 10s)
		// Trimmed mean (drop 2 highest and 2 lowest): still has 2 100s
		// So trimmed mean > median * 1.4, should fall back to median
		baseline, trustworthy := calculateTrimmedBaseline(samples)
		if !trustworthy {
			t.Error("expected trustworthy")
		}
		// Should use median (10) due to large diff
		if baseline != 10.0 {
			t.Errorf("expected baseline 10.0 (median fallback), got %f", baseline)
		}
	})

	t.Run("24+ samples uses trimmed mean when diff <= 40%", func(t *testing.T) {
		// t.Parallel()
		// Sequential values with minimal outlier effect
		samples := make([]float64, 24)
		for i := range samples {
			samples[i] = float64(i + 1) // 1,2,3,...,24
		}
		baseline, trustworthy := calculateTrimmedBaseline(samples)
		if !trustworthy {
			t.Error("expected trustworthy")
		}
		// Median of 1-24 is (12+13)/2 = 12.5
		// Trimmed mean of 3-22 is (3+4+...+22)/20 = 250/20 = 12.5
		// Both are close, should use trimmed mean
		if baseline != 12.5 {
			t.Errorf("expected baseline 12.5, got %f", baseline)
		}
	})

	t.Run("odd length array uses middle element for median", func(t *testing.T) {
		// t.Parallel()
		// 25 samples: an odd-length array
		samples := make([]float64, 25)
		for i := range samples {
			samples[i] = float64(i + 1) // 1,2,3,...,25
		}
		baseline, trustworthy := calculateTrimmedBaseline(samples)
		if !trustworthy {
			t.Error("expected trustworthy")
		}
		// Median of sorted 1-25 is the 13th element = 13
		// Trimmed mean excludes top/bottom 2: 3..23 = 21 elements, sum = (3+23)*21/2 = 273, mean = 13
		// Both are 13, diff is 0%, should use trimmed mean = 13
		if baseline != 13.0 {
			t.Errorf("expected baseline 13.0, got %f", baseline)
		}
	})

	t.Run("trimmed mean less than median triggers diff calculation", func(t *testing.T) {
		// t.Parallel()
		// Create samples where trimmed mean < median but within 40%
		// High outliers at top (excluded by trim), low values in middle
		samples := make([]float64, 24)
		// First 2 (will be trimmed): very low
		samples[0], samples[1] = 1, 2
		// Middle 20: mostly 50 but some variance
		for i := 2; i < 22; i++ {
			samples[i] = 50.0
		}
		// Last 2 (will be trimmed): very high
		samples[22], samples[23] = 100, 200

		baseline, trustworthy := calculateTrimmedBaseline(samples)
		if !trustworthy {
			t.Error("expected trustworthy")
		}
		// After sorting: 1, 2, 50x20, 100, 200
		// Median of even array: (50+50)/2 = 50
		// Trimmed mean: 50x20/20 = 50
		// Should return 50
		if baseline != 50.0 {
			t.Errorf("expected baseline 50.0, got %f", baseline)
		}
	})
}

func TestCreateOrUpdateNodeAlert(t *testing.T) {
	// t.Parallel()

	t.Run("creates new alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{ID: "pmg1", Name: "PMG 1"}
		m.createOrUpdateNodeAlert(
			"pmg1-node-queue",
			pmg,
			"mail-node1",
			"pmg-node-queue",
			AlertLevelWarning,
			100,
			50,
			"Queue depth high",
		)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-node-queue"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to be created")
		}
		if alert.Type != "pmg-node-queue" {
			t.Errorf("expected type pmg-node-queue, got %s", alert.Type)
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level, got %s", alert.Level)
		}
		if alert.Value != 100 {
			t.Errorf("expected value 100, got %f", alert.Value)
		}
		if alert.Threshold != 50 {
			t.Errorf("expected threshold 50, got %f", alert.Threshold)
		}
		if alert.Node != "mail-node1" {
			t.Errorf("expected node mail-node1, got %s", alert.Node)
		}
	})

	t.Run("updates existing alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		oldTime := time.Now().Add(-1 * time.Hour)
		m.mu.Lock()
		m.activeAlerts["pmg1-node-queue"] = &Alert{
			ID:        "pmg1-node-queue",
			Value:     50,
			Threshold: 40,
			Level:     AlertLevelWarning,
			Message:   "Old message",
			LastSeen:  oldTime,
		}
		m.mu.Unlock()

		pmg := models.PMGInstance{ID: "pmg1", Name: "PMG 1"}
		m.createOrUpdateNodeAlert(
			"pmg1-node-queue",
			pmg,
			"mail-node1",
			"pmg-node-queue",
			AlertLevelCritical,
			200,
			100,
			"New message",
		)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-node-queue"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to exist")
		}
		if alert.Value != 200 {
			t.Errorf("expected value 200, got %f", alert.Value)
		}
		if alert.Threshold != 100 {
			t.Errorf("expected threshold 100, got %f", alert.Threshold)
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
		if alert.Message != "New message" {
			t.Errorf("expected 'New message', got %s", alert.Message)
		}
		if !alert.LastSeen.After(oldTime) {
			t.Error("expected LastSeen to be updated")
		}
	})
}

func TestCheckPMGQueueDepths(t *testing.T) {
	// t.Parallel()

	t.Run("no thresholds configured does not create alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Total: 1000, Deferred: 500, Hold: 300}},
			},
		}

		// No thresholds configured (all 0)
		defaults := PMGThresholdConfig{}
		m.checkPMGQueueDepths(pmg, defaults)

		m.mu.RLock()
		totalAlerts := len(m.activeAlerts)
		m.mu.RUnlock()

		if totalAlerts != 0 {
			t.Errorf("expected no alerts when no thresholds configured, got %d", totalAlerts)
		}
	})

	t.Run("total queue warning alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Host: "pmg-server",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Total: 300}},
				{Name: "node2", QueueStatus: &models.PMGQueueStatus{Total: 250}},
			},
		}

		defaults := PMGThresholdConfig{
			QueueTotalWarning:  500,
			QueueTotalCritical: 1000,
		}
		m.checkPMGQueueDepths(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-queue-total"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected warning alert to be created")
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level, got %s", alert.Level)
		}
		if alert.Value != 550 {
			t.Errorf("expected value 550, got %f", alert.Value)
		}
	})

	t.Run("total queue critical alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Total: 600}},
				{Name: "node2", QueueStatus: &models.PMGQueueStatus{Total: 500}},
			},
		}

		defaults := PMGThresholdConfig{
			QueueTotalWarning:  500,
			QueueTotalCritical: 1000,
		}
		m.checkPMGQueueDepths(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-queue-total"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected critical alert to be created")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
		if alert.Value != 1100 {
			t.Errorf("expected value 1100, got %f", alert.Value)
		}
	})

	t.Run("deferred queue warning alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Deferred: 150}},
				{Name: "node2", QueueStatus: &models.PMGQueueStatus{Deferred: 100}},
			},
		}

		defaults := PMGThresholdConfig{
			DeferredQueueWarn:     200,
			DeferredQueueCritical: 500,
		}
		m.checkPMGQueueDepths(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-queue-deferred"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected deferred alert to be created")
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level, got %s", alert.Level)
		}
		if alert.Value != 250 {
			t.Errorf("expected value 250, got %f", alert.Value)
		}
	})

	t.Run("deferred queue critical alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Deferred: 300}},
				{Name: "node2", QueueStatus: &models.PMGQueueStatus{Deferred: 250}},
			},
		}

		defaults := PMGThresholdConfig{
			DeferredQueueWarn:     200,
			DeferredQueueCritical: 500,
		}
		m.checkPMGQueueDepths(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-queue-deferred"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected critical alert to be created")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
	})

	t.Run("hold queue warning alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Hold: 75}},
				{Name: "node2", QueueStatus: &models.PMGQueueStatus{Hold: 50}},
			},
		}

		defaults := PMGThresholdConfig{
			HoldQueueWarn:     100,
			HoldQueueCritical: 300,
		}
		m.checkPMGQueueDepths(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-queue-hold"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected hold alert to be created")
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level, got %s", alert.Level)
		}
		if alert.Value != 125 {
			t.Errorf("expected value 125, got %f", alert.Value)
		}
	})

	t.Run("hold queue critical alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Hold: 200}},
				{Name: "node2", QueueStatus: &models.PMGQueueStatus{Hold: 150}},
			},
		}

		defaults := PMGThresholdConfig{
			HoldQueueWarn:     100,
			HoldQueueCritical: 300,
		}
		m.checkPMGQueueDepths(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-queue-hold"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected critical alert to be created")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
	})

	t.Run("updates existing alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-1 * time.Hour)
		m.mu.Lock()
		m.activeAlerts["pmg1-queue-total"] = &Alert{
			ID:       "pmg1-queue-total",
			Value:    400,
			Level:    AlertLevelWarning,
			LastSeen: oldTime,
		}
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Total: 1200}},
			},
		}

		defaults := PMGThresholdConfig{
			QueueTotalWarning:  500,
			QueueTotalCritical: 1000,
		}
		m.checkPMGQueueDepths(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-queue-total"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to exist")
		}
		if alert.Value != 1200 {
			t.Errorf("expected value 1200, got %f", alert.Value)
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
		if !alert.LastSeen.After(oldTime) {
			t.Error("expected LastSeen to be updated")
		}
	})

	t.Run("below threshold clears alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["pmg1-queue-total"] = &Alert{ID: "pmg1-queue-total"}
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Total: 100}},
			},
		}

		defaults := PMGThresholdConfig{
			QueueTotalWarning:  500,
			QueueTotalCritical: 1000,
		}
		m.checkPMGQueueDepths(pmg, defaults)

		m.mu.RLock()
		_, exists := m.activeAlerts["pmg1-queue-total"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected alert to be cleared when below threshold")
		}
	})

	t.Run("nil QueueStatus is handled", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: nil},
				{Name: "node2", QueueStatus: &models.PMGQueueStatus{Total: 100}},
			},
		}

		defaults := PMGThresholdConfig{
			QueueTotalWarning: 500,
		}
		// Should not panic
		m.checkPMGQueueDepths(pmg, defaults)

		m.mu.RLock()
		_, exists := m.activeAlerts["pmg1-queue-total"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected no alert with total below threshold")
		}
	})
}

func TestCheckPMGOldestMessage(t *testing.T) {
	// t.Parallel()

	t.Run("no thresholds configured returns early", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{OldestAge: 7200}}, // 2 hours
			},
		}

		defaults := PMGThresholdConfig{} // No thresholds
		m.checkPMGOldestMessage(pmg, defaults)

		m.mu.RLock()
		_, exists := m.activeAlerts["pmg1-oldest-message"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected no alert when no thresholds configured")
		}
	})

	t.Run("no messages clears existing alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["pmg1-oldest-message"] = &Alert{ID: "pmg1-oldest-message"}
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{OldestAge: 0}},
			},
		}

		defaults := PMGThresholdConfig{
			OldestMessageWarnMins: 30,
			OldestMessageCritMins: 60,
		}
		m.checkPMGOldestMessage(pmg, defaults)

		m.mu.RLock()
		_, exists := m.activeAlerts["pmg1-oldest-message"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected alert to be cleared when no messages in queue")
		}
	})

	t.Run("warning alert when message age exceeds warning threshold", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Host: "pmg-server",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{OldestAge: 2400}}, // 40 minutes
			},
		}

		defaults := PMGThresholdConfig{
			OldestMessageWarnMins: 30,
			OldestMessageCritMins: 60,
		}
		m.checkPMGOldestMessage(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-oldest-message"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected warning alert to be created")
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level, got %s", alert.Level)
		}
		if alert.Value != 40 {
			t.Errorf("expected value 40 minutes, got %f", alert.Value)
		}
		if alert.Threshold != 30 {
			t.Errorf("expected threshold 30, got %f", alert.Threshold)
		}
	})

	t.Run("critical alert when message age exceeds critical threshold", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{OldestAge: 4200}}, // 70 minutes
			},
		}

		defaults := PMGThresholdConfig{
			OldestMessageWarnMins: 30,
			OldestMessageCritMins: 60,
		}
		m.checkPMGOldestMessage(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-oldest-message"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected critical alert to be created")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
		if alert.Threshold != 60 {
			t.Errorf("expected threshold 60, got %f", alert.Threshold)
		}
	})

	t.Run("below threshold clears alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["pmg1-oldest-message"] = &Alert{ID: "pmg1-oldest-message"}
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{OldestAge: 900}}, // 15 minutes
			},
		}

		defaults := PMGThresholdConfig{
			OldestMessageWarnMins: 30,
			OldestMessageCritMins: 60,
		}
		m.checkPMGOldestMessage(pmg, defaults)

		m.mu.RLock()
		_, exists := m.activeAlerts["pmg1-oldest-message"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected alert to be cleared when below threshold")
		}
	})

	t.Run("finds oldest across multiple nodes", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{OldestAge: 1200}}, // 20 minutes
				{Name: "node2", QueueStatus: &models.PMGQueueStatus{OldestAge: 3000}}, // 50 minutes (oldest)
				{Name: "node3", QueueStatus: &models.PMGQueueStatus{OldestAge: 600}},  // 10 minutes
			},
		}

		defaults := PMGThresholdConfig{
			OldestMessageWarnMins: 30,
			OldestMessageCritMins: 60,
		}
		m.checkPMGOldestMessage(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-oldest-message"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to be created")
		}
		if alert.Value != 50 {
			t.Errorf("expected value 50 (oldest across nodes), got %f", alert.Value)
		}
	})

	t.Run("updates existing alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-1 * time.Hour)
		m.mu.Lock()
		m.activeAlerts["pmg1-oldest-message"] = &Alert{
			ID:       "pmg1-oldest-message",
			Value:    40,
			Level:    AlertLevelWarning,
			LastSeen: oldTime,
		}
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{OldestAge: 4800}}, // 80 minutes
			},
		}

		defaults := PMGThresholdConfig{
			OldestMessageWarnMins: 30,
			OldestMessageCritMins: 60,
		}
		m.checkPMGOldestMessage(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-oldest-message"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to exist")
		}
		if alert.Value != 80 {
			t.Errorf("expected value 80, got %f", alert.Value)
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
		if !alert.LastSeen.After(oldTime) {
			t.Error("expected LastSeen to be updated")
		}
	})

	t.Run("nil QueueStatus is handled", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: nil},
				{Name: "node2", QueueStatus: &models.PMGQueueStatus{OldestAge: 2400}}, // 40 minutes
			},
		}

		defaults := PMGThresholdConfig{
			OldestMessageWarnMins: 30,
		}
		// Should not panic and should use the valid node's data
		m.checkPMGOldestMessage(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-oldest-message"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to be created from valid node data")
		}
		if alert.Value != 40 {
			t.Errorf("expected value 40, got %f", alert.Value)
		}
	})
}

func TestCheckStorageOffline(t *testing.T) {
	// t.Parallel()

	t.Run("first poll increments confirmation but does not create alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:   "local-lvm",
			Name: "Local LVM",
			Node: "pve-node1",
		}

		m.checkStorageOffline(storage)

		m.mu.RLock()
		confirmCount := m.offlineConfirmations["local-lvm"]
		_, alertExists := m.activeAlerts["storage-offline-local-lvm"]
		m.mu.RUnlock()

		if confirmCount != 1 {
			t.Errorf("expected confirmation count 1, got %d", confirmCount)
		}
		if alertExists {
			t.Error("expected no alert on first poll")
		}
	})

	t.Run("second poll creates alert after confirmation", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:       "local-lvm",
			Name:     "Local LVM",
			Node:     "pve-node1",
			Instance: "pve-instance",
		}

		// First poll - confirmation
		m.checkStorageOffline(storage)
		// Second poll - should create alert
		m.checkStorageOffline(storage)

		m.mu.RLock()
		alert := m.activeAlerts["storage-offline-local-lvm"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to be created after second poll")
		}
		if alert.Type != "offline" {
			t.Errorf("expected type 'offline', got %s", alert.Type)
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level, got %s", alert.Level)
		}
		if alert.ResourceID != "local-lvm" {
			t.Errorf("expected resource ID 'local-lvm', got %s", alert.ResourceID)
		}
		if alert.Node != "pve-node1" {
			t.Errorf("expected node 'pve-node1', got %s", alert.Node)
		}
	})

	t.Run("existing alert updates LastSeen", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-1 * time.Hour)
		m.mu.Lock()
		m.offlineConfirmations["local-lvm"] = 5 // Already confirmed
		m.activeAlerts["storage-offline-local-lvm"] = &Alert{
			ID:       "storage-offline-local-lvm",
			LastSeen: oldTime,
		}
		m.mu.Unlock()

		storage := models.Storage{
			ID:   "local-lvm",
			Name: "Local LVM",
			Node: "pve-node1",
		}

		m.checkStorageOffline(storage)

		m.mu.RLock()
		alert := m.activeAlerts["storage-offline-local-lvm"]
		m.mu.RUnlock()

		if !alert.LastSeen.After(oldTime) {
			t.Error("expected LastSeen to be updated")
		}
	})

	t.Run("disabled storage clears existing alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Pre-create an alert
		m.mu.Lock()
		m.activeAlerts["storage-offline-local-lvm"] = &Alert{ID: "storage-offline-local-lvm"}
		m.config.Overrides = map[string]ThresholdConfig{
			"local-lvm": {Disabled: true},
		}
		m.mu.Unlock()

		storage := models.Storage{
			ID:   "local-lvm",
			Name: "Local LVM",
			Node: "pve-node1",
		}

		m.checkStorageOffline(storage)

		m.mu.RLock()
		_, exists := m.activeAlerts["storage-offline-local-lvm"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected alert to be cleared when storage is disabled")
		}
	})

	t.Run("disabled storage does not create alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.Overrides = map[string]ThresholdConfig{
			"local-lvm": {Disabled: true},
		}
		m.mu.Unlock()

		storage := models.Storage{
			ID:   "local-lvm",
			Name: "Local LVM",
			Node: "pve-node1",
		}

		// Multiple polls should not create alert
		m.checkStorageOffline(storage)
		m.checkStorageOffline(storage)
		m.checkStorageOffline(storage)

		m.mu.RLock()
		_, exists := m.activeAlerts["storage-offline-local-lvm"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected no alert when storage is disabled")
		}
	})
}

func TestCheckGuestPoweredOff(t *testing.T) {
	// t.Parallel()

	t.Run("first poll increments confirmation but does not create alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.checkGuestPoweredOff("vm100", "TestVM", "pve-node1", "pve-instance", "VM", false)

		m.mu.RLock()
		confirmCount := m.offlineConfirmations["vm100"]
		_, alertExists := m.activeAlerts["guest-powered-off-vm100"]
		m.mu.RUnlock()

		if confirmCount != 1 {
			t.Errorf("expected confirmation count 1, got %d", confirmCount)
		}
		if alertExists {
			t.Error("expected no alert on first poll")
		}
	})

	t.Run("second poll creates alert after confirmation", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// First poll - confirmation
		m.checkGuestPoweredOff("vm100", "TestVM", "pve-node1", "pve-instance", "VM", false)
		// Second poll - should create alert
		m.checkGuestPoweredOff("vm100", "TestVM", "pve-node1", "pve-instance", "VM", false)

		m.mu.RLock()
		alert := m.activeAlerts["guest-powered-off-vm100"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to be created after second poll")
		}
		if alert.Type != "powered-off" {
			t.Errorf("expected type 'powered-off', got %s", alert.Type)
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level (default severity), got %s", alert.Level)
		}
		if alert.ResourceID != "vm100" {
			t.Errorf("expected resource ID 'vm100', got %s", alert.ResourceID)
		}
	})

	t.Run("existing alert updates LastSeen and level", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-1 * time.Hour)
		m.mu.Lock()
		m.activeAlerts["guest-powered-off-vm100"] = &Alert{
			ID:       "guest-powered-off-vm100",
			LastSeen: oldTime,
			Level:    AlertLevelWarning,
		}
		m.mu.Unlock()

		m.checkGuestPoweredOff("vm100", "TestVM", "pve-node1", "pve-instance", "VM", false)

		m.mu.RLock()
		alert := m.activeAlerts["guest-powered-off-vm100"]
		m.mu.RUnlock()

		if !alert.LastSeen.After(oldTime) {
			t.Error("expected LastSeen to be updated")
		}
	})

	t.Run("monitorOnly flag is set in metadata", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// First poll
		m.checkGuestPoweredOff("vm100", "TestVM", "pve-node1", "pve-instance", "VM", true)
		// Second poll - creates alert
		m.checkGuestPoweredOff("vm100", "TestVM", "pve-node1", "pve-instance", "VM", true)

		m.mu.RLock()
		alert := m.activeAlerts["guest-powered-off-vm100"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to be created")
		}
		if alert.Metadata == nil {
			t.Fatal("expected metadata to be set")
		}
		if monitorOnly, ok := alert.Metadata["monitorOnly"].(bool); !ok || !monitorOnly {
			t.Error("expected monitorOnly to be true")
		}
	})

	t.Run("disabled guest clears existing alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Pre-create an alert and confirmation count
		m.mu.Lock()
		m.activeAlerts["guest-powered-off-vm100"] = &Alert{ID: "guest-powered-off-vm100"}
		m.offlineConfirmations["vm100"] = 5
		m.config.Overrides = map[string]ThresholdConfig{
			"vm100": {Disabled: true},
		}
		m.mu.Unlock()

		m.checkGuestPoweredOff("vm100", "TestVM", "pve-node1", "pve-instance", "VM", false)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["guest-powered-off-vm100"]
		_, confirmExists := m.offlineConfirmations["vm100"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected alert to be cleared when guest is disabled")
		}
		if confirmExists {
			t.Error("expected confirmation count to be cleared")
		}
	})

	t.Run("disableConnectivity clears existing alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Pre-create an alert
		m.mu.Lock()
		m.activeAlerts["guest-powered-off-vm100"] = &Alert{ID: "guest-powered-off-vm100"}
		m.config.Overrides = map[string]ThresholdConfig{
			"vm100": {DisableConnectivity: true},
		}
		m.mu.Unlock()

		m.checkGuestPoweredOff("vm100", "TestVM", "pve-node1", "pve-instance", "VM", false)

		m.mu.RLock()
		_, exists := m.activeAlerts["guest-powered-off-vm100"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected alert to be cleared when connectivity is disabled")
		}
	})

	t.Run("uses override severity when configured", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.Overrides = map[string]ThresholdConfig{
			"vm100": {PoweredOffSeverity: AlertLevelCritical},
		}
		m.mu.Unlock()

		// First poll
		m.checkGuestPoweredOff("vm100", "TestVM", "pve-node1", "pve-instance", "VM", false)
		// Second poll
		m.checkGuestPoweredOff("vm100", "TestVM", "pve-node1", "pve-instance", "VM", false)

		m.mu.RLock()
		alert := m.activeAlerts["guest-powered-off-vm100"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to be created")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level from override, got %s", alert.Level)
		}
	})

	t.Run("uses default severity when no override", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.GuestDefaults.PoweredOffSeverity = AlertLevelCritical
		m.mu.Unlock()

		// First poll
		m.checkGuestPoweredOff("vm100", "TestVM", "pve-node1", "pve-instance", "VM", false)
		// Second poll
		m.checkGuestPoweredOff("vm100", "TestVM", "pve-node1", "pve-instance", "VM", false)

		m.mu.RLock()
		alert := m.activeAlerts["guest-powered-off-vm100"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to be created")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level from defaults, got %s", alert.Level)
		}
	})

	t.Run("container type in message", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// First poll
		m.checkGuestPoweredOff("ct200", "TestContainer", "pve-node1", "pve-instance", "Container", false)
		// Second poll
		m.checkGuestPoweredOff("ct200", "TestContainer", "pve-node1", "pve-instance", "Container", false)

		m.mu.RLock()
		alert := m.activeAlerts["guest-powered-off-ct200"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to be created")
		}
		if !strings.Contains(alert.Message, "Container") {
			t.Errorf("expected message to contain 'Container', got %s", alert.Message)
		}
		if !strings.Contains(alert.Message, "TestContainer") {
			t.Errorf("expected message to contain 'TestContainer', got %s", alert.Message)
		}
	})
}

func TestCleanup(t *testing.T) {
	// t.Parallel()

	t.Run("auto-acknowledges old alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-3 * time.Hour)
		m.mu.Lock()
		m.config.AutoAcknowledgeAfterHours = 2
		m.activeAlerts["old-alert"] = &Alert{
			ID:           "old-alert",
			StartTime:    oldTime,
			Acknowledged: false,
		}
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		alert := m.activeAlerts["old-alert"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to exist")
		}
		if !alert.Acknowledged {
			t.Error("expected alert to be auto-acknowledged")
		}
		if alert.AckUser != "system-auto" {
			t.Errorf("expected AckUser 'system-auto', got %s", alert.AckUser)
		}
	})

	t.Run("removes old acknowledged alerts by TTL", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldAckTime := time.Now().Add(-10 * 24 * time.Hour) // 10 days ago
		m.mu.Lock()
		m.config.MaxAcknowledgedAgeDays = 7
		m.activeAlerts["ack-alert"] = &Alert{
			ID:           "ack-alert",
			Acknowledged: true,
			AckTime:      &oldAckTime,
		}
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		_, exists := m.activeAlerts["ack-alert"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected acknowledged alert to be removed by TTL")
		}
	})

	t.Run("removes old unacknowledged alerts by TTL", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-40 * 24 * time.Hour) // 40 days ago
		m.mu.Lock()
		m.config.MaxAlertAgeDays = 30
		m.config.AutoAcknowledgeAfterHours = 0 // Disable auto-acknowledge to test TTL
		m.activeAlerts["old-unack-alert"] = &Alert{
			ID:           "old-unack-alert",
			StartTime:    oldTime,
			Acknowledged: false,
		}
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		_, exists := m.activeAlerts["old-unack-alert"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected old unacknowledged alert to be removed by TTL")
		}
	})

	t.Run("removes acknowledged alerts by maxAge fallback", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldAckTime := time.Now().Add(-2 * time.Hour)
		m.mu.Lock()
		m.activeAlerts["ack-fallback"] = &Alert{
			ID:           "ack-fallback",
			Acknowledged: true,
			AckTime:      &oldAckTime,
		}
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		_, exists := m.activeAlerts["ack-fallback"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected acknowledged alert to be removed by maxAge fallback")
		}
	})

	t.Run("keeps acknowledged alerts that are still active", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldAckTime := time.Now().Add(-2 * time.Hour)
		recentSeen := time.Now().Add(-5 * time.Minute)
		m.mu.Lock()
		m.activeAlerts["ack-active"] = &Alert{
			ID:           "ack-active",
			Acknowledged: true,
			AckTime:      &oldAckTime,
			LastSeen:     recentSeen,
			StartTime:    recentSeen,
		}
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		_, exists := m.activeAlerts["ack-active"]
		m.mu.RUnlock()

		if !exists {
			t.Error("expected acknowledged active alert to remain")
		}
	})

	t.Run("cleans up old recent alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-10 * time.Minute)
		m.mu.Lock()
		m.recentAlerts["recent-old"] = &Alert{
			ID:        "recent-old",
			StartTime: oldTime,
		}
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		_, exists := m.recentAlerts["recent-old"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected old recent alert to be cleaned up")
		}
	})

	t.Run("cleans up expired suppressions", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.suppressedUntil["suppressed-alert"] = time.Now().Add(-1 * time.Hour)
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		_, exists := m.suppressedUntil["suppressed-alert"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected expired suppression to be cleaned up")
		}
	})

	t.Run("cleans up old rate limit entries", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.alertRateLimit["rate-limited"] = []time.Time{
			time.Now().Add(-2 * time.Hour),    // Old, should be removed
			time.Now().Add(-30 * time.Minute), // Recent, should remain
		}
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		times := m.alertRateLimit["rate-limited"]
		m.mu.RUnlock()

		if len(times) != 1 {
			t.Errorf("expected 1 recent time, got %d", len(times))
		}
	})

	t.Run("removes empty rate limit entries", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.alertRateLimit["all-old"] = []time.Time{
			time.Now().Add(-2 * time.Hour),
		}
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		_, exists := m.alertRateLimit["all-old"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected empty rate limit entry to be removed")
		}
	})

	t.Run("cleans up old recently resolved alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.resolvedMutex.Lock()
		m.recentlyResolved["old-resolved"] = &ResolvedAlert{
			Alert:        &Alert{ID: "old-resolved"},
			ResolvedTime: time.Now().Add(-10 * time.Minute),
		}
		m.resolvedMutex.Unlock()

		m.Cleanup(1 * time.Hour)

		m.resolvedMutex.Lock()
		_, exists := m.recentlyResolved["old-resolved"]
		m.resolvedMutex.Unlock()

		if exists {
			t.Error("expected old recently resolved alert to be cleaned up")
		}
	})

	t.Run("cleans up stale pending alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.pendingAlerts["stale-pending"] = time.Now().Add(-15 * time.Minute)
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		_, exists := m.pendingAlerts["stale-pending"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected stale pending alert to be cleaned up")
		}
	})

	t.Run("cleans up flapping history for inactive alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.flappingHistory["inactive-alert"] = []time.Time{
			time.Now().Add(-30 * time.Minute),
		}
		m.flappingActive["inactive-alert"] = true
		// No active alert, no suppression
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		_, historyExists := m.flappingHistory["inactive-alert"]
		_, activeExists := m.flappingActive["inactive-alert"]
		m.mu.RUnlock()

		if historyExists {
			t.Error("expected flapping history to be cleaned up")
		}
		if activeExists {
			t.Error("expected flapping active flag to be cleaned up")
		}
	})

	t.Run("cleans up stale Docker restart tracking", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.dockerRestartTracking["stale-container"] = &dockerRestartRecord{
			lastChecked: time.Now().Add(-25 * time.Hour),
		}
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		_, exists := m.dockerRestartTracking["stale-container"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected stale Docker restart tracking to be cleaned up")
		}
	})

	t.Run("cleans up stale PMG anomaly trackers", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.pmgAnomalyTrackers["stale-pmg"] = &pmgAnomalyTracker{
			LastSampleTime: time.Now().Add(-25 * time.Hour),
		}
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		_, exists := m.pmgAnomalyTrackers["stale-pmg"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected stale PMG anomaly tracker to be cleaned up")
		}
	})

	t.Run("cleans up empty PMG quarantine history", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.pmgQuarantineHistory["empty-pmg"] = []pmgQuarantineSnapshot{}
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		_, exists := m.pmgQuarantineHistory["empty-pmg"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected empty PMG quarantine history to be cleaned up")
		}
	})

	t.Run("cleans up stale PMG quarantine history", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.pmgQuarantineHistory["stale-pmg"] = []pmgQuarantineSnapshot{
			{Timestamp: time.Now().Add(-8 * 24 * time.Hour)},
		}
		m.mu.Unlock()

		m.Cleanup(1 * time.Hour)

		m.mu.RLock()
		_, exists := m.pmgQuarantineHistory["stale-pmg"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected stale PMG quarantine history to be cleaned up")
		}
	})
}

func TestConvertLegacyThreshold(t *testing.T) {
	// t.Parallel()

	t.Run("nil input returns nil", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		result := m.convertLegacyThreshold(nil)

		if result != nil {
			t.Error("expected nil result for nil input")
		}
	})

	t.Run("zero value returns nil", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		zero := 0.0
		result := m.convertLegacyThreshold(&zero)

		if result != nil {
			t.Error("expected nil result for zero value")
		}
	})

	t.Run("negative value returns nil", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		neg := -5.0
		result := m.convertLegacyThreshold(&neg)

		if result != nil {
			t.Error("expected nil result for negative value")
		}
	})

	t.Run("positive value with default margin", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		threshold := 80.0
		result := m.convertLegacyThreshold(&threshold)

		if result == nil {
			t.Fatal("expected non-nil result")
		}
		if result.Trigger != 80.0 {
			t.Errorf("expected trigger 80.0, got %f", result.Trigger)
		}
		if result.Clear != 75.0 { // 80 - 5 (default margin)
			t.Errorf("expected clear 75.0, got %f", result.Clear)
		}
	})

	t.Run("positive value with custom margin", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.HysteresisMargin = 10.0
		m.mu.Unlock()

		threshold := 80.0
		result := m.convertLegacyThreshold(&threshold)

		if result == nil {
			t.Fatal("expected non-nil result")
		}
		if result.Trigger != 80.0 {
			t.Errorf("expected trigger 80.0, got %f", result.Trigger)
		}
		if result.Clear != 70.0 { // 80 - 10 (custom margin)
			t.Errorf("expected clear 70.0, got %f", result.Clear)
		}
	})
}

func TestCheckEscalations(t *testing.T) {
	// t.Parallel()

	t.Run("does nothing when escalation is disabled", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-2 * time.Hour)
		m.mu.Lock()
		m.config.ActivationState = ActivationActive
		m.config.Schedule.Escalation.Enabled = false
		m.config.Schedule.Escalation.Levels = []EscalationLevel{
			{After: 30, Notify: "email"},
		}
		m.activeAlerts["test-alert"] = &Alert{
			ID:             "test-alert",
			StartTime:      oldTime,
			LastEscalation: 0,
		}
		m.mu.Unlock()

		m.checkEscalations()

		m.mu.RLock()
		alert := m.activeAlerts["test-alert"]
		m.mu.RUnlock()

		if alert.LastEscalation != 0 {
			t.Errorf("expected no escalation when disabled, got %d", alert.LastEscalation)
		}
	})

	t.Run("does nothing when alerts are globally disabled", func(t *testing.T) {
		m := newTestManager(t)

		oldTime := time.Now().Add(-2 * time.Hour)
		m.mu.Lock()
		m.config.Enabled = false
		m.config.ActivationState = ActivationActive
		m.config.Schedule.Escalation.Enabled = true
		m.config.Schedule.Escalation.Levels = []EscalationLevel{
			{After: 30, Notify: "email"},
		}
		m.activeAlerts["global-disabled-alert"] = &Alert{
			ID:             "global-disabled-alert",
			StartTime:      oldTime,
			LastEscalation: 0,
		}
		m.mu.Unlock()

		m.checkEscalations()

		m.mu.RLock()
		alert := m.activeAlerts["global-disabled-alert"]
		m.mu.RUnlock()

		if alert.LastEscalation != 0 {
			t.Errorf("expected no escalation when alerts are globally disabled, got %d", alert.LastEscalation)
		}
	})

	t.Run("does nothing when activation state is pending", func(t *testing.T) {
		m := newTestManager(t)

		oldTime := time.Now().Add(-2 * time.Hour)
		m.mu.Lock()
		m.config.Enabled = true
		m.config.ActivationState = ActivationPending
		m.config.Schedule.Escalation.Enabled = true
		m.config.Schedule.Escalation.Levels = []EscalationLevel{
			{After: 30, Notify: "email"},
		}
		m.activeAlerts["pending-alert"] = &Alert{
			ID:             "pending-alert",
			StartTime:      oldTime,
			LastEscalation: 0,
		}
		m.mu.Unlock()

		m.checkEscalations()

		m.mu.RLock()
		alert := m.activeAlerts["pending-alert"]
		m.mu.RUnlock()

		if alert.LastEscalation != 0 {
			t.Errorf("expected no escalation when activation is pending, got %d", alert.LastEscalation)
		}
	})

	t.Run("does nothing when activation state is snoozed", func(t *testing.T) {
		m := newTestManager(t)

		oldTime := time.Now().Add(-2 * time.Hour)
		m.mu.Lock()
		m.config.Enabled = true
		m.config.ActivationState = ActivationSnoozed
		m.config.Schedule.Escalation.Enabled = true
		m.config.Schedule.Escalation.Levels = []EscalationLevel{
			{After: 30, Notify: "email"},
		}
		m.activeAlerts["snoozed-alert"] = &Alert{
			ID:             "snoozed-alert",
			StartTime:      oldTime,
			LastEscalation: 0,
		}
		m.mu.Unlock()

		m.checkEscalations()

		m.mu.RLock()
		alert := m.activeAlerts["snoozed-alert"]
		m.mu.RUnlock()

		if alert.LastEscalation != 0 {
			t.Errorf("expected no escalation when activation is snoozed, got %d", alert.LastEscalation)
		}
	})

	t.Run("skips acknowledged alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-2 * time.Hour)
		m.mu.Lock()
		m.config.ActivationState = ActivationActive
		m.config.Schedule.Escalation.Enabled = true
		m.config.Schedule.Escalation.Levels = []EscalationLevel{
			{After: 30, Notify: "email"},
		}
		m.activeAlerts["ack-alert"] = &Alert{
			ID:             "ack-alert",
			StartTime:      oldTime,
			LastEscalation: 0,
			Acknowledged:   true,
		}
		m.mu.Unlock()

		m.checkEscalations()

		m.mu.RLock()
		alert := m.activeAlerts["ack-alert"]
		m.mu.RUnlock()

		if alert.LastEscalation != 0 {
			t.Error("expected no escalation for acknowledged alert")
		}
	})

	t.Run("escalates alert after threshold time", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-45 * time.Minute) // 45 minutes ago
		m.mu.Lock()
		m.config.ActivationState = ActivationActive
		m.config.Schedule.Escalation.Enabled = true
		m.config.Schedule.Escalation.Levels = []EscalationLevel{
			{After: 30, Notify: "email"},   // 30 minutes
			{After: 60, Notify: "webhook"}, // 60 minutes
		}
		m.activeAlerts["escalate-alert"] = &Alert{
			ID:             "escalate-alert",
			StartTime:      oldTime,
			LastEscalation: 0,
		}
		m.mu.Unlock()

		m.checkEscalations()

		m.mu.RLock()
		alert := m.activeAlerts["escalate-alert"]
		m.mu.RUnlock()

		if alert.LastEscalation != 1 {
			t.Errorf("expected escalation to level 1, got %d", alert.LastEscalation)
		}
		if len(alert.EscalationTimes) != 1 {
			t.Errorf("expected 1 escalation time, got %d", len(alert.EscalationTimes))
		}
	})

	t.Run("escalates to multiple levels", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-90 * time.Minute) // 90 minutes ago
		m.mu.Lock()
		m.config.ActivationState = ActivationActive
		m.config.Schedule.Escalation.Enabled = true
		m.config.Schedule.Escalation.Levels = []EscalationLevel{
			{After: 30, Notify: "email"},   // 30 minutes
			{After: 60, Notify: "webhook"}, // 60 minutes
		}
		m.activeAlerts["multi-escalate"] = &Alert{
			ID:             "multi-escalate",
			StartTime:      oldTime,
			LastEscalation: 0,
		}
		m.mu.Unlock()

		m.checkEscalations()

		m.mu.RLock()
		alert := m.activeAlerts["multi-escalate"]
		m.mu.RUnlock()

		if alert.LastEscalation != 2 {
			t.Errorf("expected escalation to level 2, got %d", alert.LastEscalation)
		}
		if len(alert.EscalationTimes) != 2 {
			t.Errorf("expected 2 escalation times, got %d", len(alert.EscalationTimes))
		}
	})

	t.Run("does not re-escalate already escalated level", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-45 * time.Minute)
		m.mu.Lock()
		m.config.ActivationState = ActivationActive
		m.config.Schedule.Escalation.Enabled = true
		m.config.Schedule.Escalation.Levels = []EscalationLevel{
			{After: 30, Notify: "email"},
		}
		m.activeAlerts["already-escalated"] = &Alert{
			ID:              "already-escalated",
			StartTime:       oldTime,
			LastEscalation:  1,
			EscalationTimes: []time.Time{time.Now().Add(-10 * time.Minute)},
		}
		m.mu.Unlock()

		m.checkEscalations()

		m.mu.RLock()
		alert := m.activeAlerts["already-escalated"]
		m.mu.RUnlock()

		if alert.LastEscalation != 1 {
			t.Errorf("expected escalation to remain at 1, got %d", alert.LastEscalation)
		}
		if len(alert.EscalationTimes) != 1 {
			t.Errorf("expected 1 escalation time (unchanged), got %d", len(alert.EscalationTimes))
		}
	})

	t.Run("does not escalate before threshold time", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		recentTime := time.Now().Add(-10 * time.Minute) // Only 10 minutes ago
		m.mu.Lock()
		m.config.ActivationState = ActivationActive
		m.config.Schedule.Escalation.Enabled = true
		m.config.Schedule.Escalation.Levels = []EscalationLevel{
			{After: 30, Notify: "email"}, // 30 minutes threshold
		}
		m.activeAlerts["recent-alert"] = &Alert{
			ID:             "recent-alert",
			StartTime:      recentTime,
			LastEscalation: 0,
		}
		m.mu.Unlock()

		m.checkEscalations()

		m.mu.RLock()
		alert := m.activeAlerts["recent-alert"]
		m.mu.RUnlock()

		if alert.LastEscalation != 0 {
			t.Error("expected no escalation for recent alert")
		}
	})
}

func TestCleanupAlertsForNodes(t *testing.T) {
	// t.Parallel()

	t.Run("removes alerts for non-existent nodes", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["alert-old-node"] = &Alert{
			ID:   "alert-old-node",
			Node: "old-node",
		}
		m.activeAlerts["alert-valid-node"] = &Alert{
			ID:   "alert-valid-node",
			Node: "valid-node",
		}
		m.mu.Unlock()

		existingNodes := map[string]bool{
			"valid-node": true,
		}

		m.CleanupAlertsForNodes(existingNodes)

		// Give async save goroutine time to complete
		time.Sleep(50 * time.Millisecond)

		m.mu.RLock()
		_, oldExists := m.activeAlerts["alert-old-node"]
		_, validExists := m.activeAlerts["alert-valid-node"]
		m.mu.RUnlock()

		if oldExists {
			t.Error("expected alert for old node to be removed")
		}
		if !validExists {
			t.Error("expected alert for valid node to remain")
		}
	})

	t.Run("skips Docker alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["docker-container-state"] = &Alert{
			ID:         "docker-container-state",
			ResourceID: "docker:host1:container1",
			Node:       "non-existent-node",
		}
		m.activeAlerts["alert-with-docker-resource"] = &Alert{
			ID:         "alert-with-docker-resource",
			ResourceID: "docker:host2:container2",
			Node:       "non-existent-node",
		}
		m.mu.Unlock()

		existingNodes := map[string]bool{}

		m.CleanupAlertsForNodes(existingNodes)

		m.mu.RLock()
		_, dockerExists := m.activeAlerts["docker-container-state"]
		_, dockerResourceExists := m.activeAlerts["alert-with-docker-resource"]
		m.mu.RUnlock()

		if !dockerExists {
			t.Error("expected docker alert to be preserved")
		}
		if !dockerResourceExists {
			t.Error("expected alert with docker resource to be preserved")
		}
	})

	t.Run("skips PBS alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["pbs-offline-test"] = &Alert{
			ID:   "pbs-offline-test",
			Node: "non-existent-node",
		}
		m.activeAlerts["pbs-backup-alert"] = &Alert{
			ID:   "pbs-backup-alert",
			Type: "pbs-offline",
			Node: "non-existent-node",
		}
		m.mu.Unlock()

		existingNodes := map[string]bool{}

		m.CleanupAlertsForNodes(existingNodes)

		m.mu.RLock()
		_, pbsExists := m.activeAlerts["pbs-offline-test"]
		_, pbsTypeExists := m.activeAlerts["pbs-backup-alert"]
		m.mu.RUnlock()

		if !pbsExists {
			t.Error("expected pbs-prefixed alert to be preserved")
		}
		if !pbsTypeExists {
			t.Error("expected pbs-offline type alert to be preserved")
		}
	})

	t.Run("removes alerts with empty node", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["empty-node-alert"] = &Alert{
			ID:   "empty-node-alert",
			Node: "",
		}
		m.mu.Unlock()

		existingNodes := map[string]bool{
			"valid-node": true,
		}

		m.CleanupAlertsForNodes(existingNodes)

		time.Sleep(50 * time.Millisecond)

		m.mu.RLock()
		_, exists := m.activeAlerts["empty-node-alert"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected alert with empty node to be removed")
		}
	})

	t.Run("handles nil alert in map", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["nil-alert"] = nil
		m.activeAlerts["valid-alert"] = &Alert{
			ID:   "valid-alert",
			Node: "valid-node",
		}
		m.mu.Unlock()

		existingNodes := map[string]bool{
			"valid-node": true,
		}

		// Should not panic
		m.CleanupAlertsForNodes(existingNodes)

		m.mu.RLock()
		_, validExists := m.activeAlerts["valid-alert"]
		m.mu.RUnlock()

		if !validExists {
			t.Error("expected valid alert to remain")
		}
	})

	t.Run("no cleanup needed logs correctly", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["valid-alert"] = &Alert{
			ID:   "valid-alert",
			Node: "valid-node",
		}
		m.mu.Unlock()

		existingNodes := map[string]bool{
			"valid-node": true,
		}

		// Should not panic and should not remove any alerts
		m.CleanupAlertsForNodes(existingNodes)

		m.mu.RLock()
		count := len(m.activeAlerts)
		m.mu.RUnlock()

		if count != 1 {
			t.Errorf("expected 1 alert, got %d", count)
		}
	})
}

func TestCheckZFSPoolHealth(t *testing.T) {
	// t.Parallel()

	t.Run("nil ZFSPool returns early", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:      "local-zfs",
			Name:    "Local ZFS",
			Node:    "pve-node1",
			ZFSPool: nil,
		}

		// Should not panic
		m.checkZFSPoolHealth(storage)

		m.mu.RLock()
		count := len(m.activeAlerts)
		m.mu.RUnlock()

		if count != 0 {
			t.Errorf("expected no alerts for nil pool, got %d", count)
		}
	})

	t.Run("ONLINE pool does not create state alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:   "local-zfs",
			Name: "Local ZFS",
			Node: "pve-node1",
			ZFSPool: &models.ZFSPool{
				Name:  "rpool",
				State: "ONLINE",
			},
		}

		m.checkZFSPoolHealth(storage)

		m.mu.RLock()
		_, exists := m.activeAlerts["zfs-pool-state-local-zfs"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected no state alert for ONLINE pool")
		}
	})

	t.Run("DEGRADED pool creates warning alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:       "local-zfs",
			Name:     "Local ZFS",
			Node:     "pve-node1",
			Instance: "pve-instance",
			ZFSPool: &models.ZFSPool{
				Name:  "rpool",
				State: "DEGRADED",
			},
		}

		m.checkZFSPoolHealth(storage)

		m.mu.RLock()
		alert := m.activeAlerts["zfs-pool-state-local-zfs"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected state alert for DEGRADED pool")
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level, got %s", alert.Level)
		}
		if alert.Type != "zfs-pool-state" {
			t.Errorf("expected type 'zfs-pool-state', got %s", alert.Type)
		}
	})

	t.Run("FAULTED pool creates critical alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:   "local-zfs",
			Name: "Local ZFS",
			Node: "pve-node1",
			ZFSPool: &models.ZFSPool{
				Name:  "rpool",
				State: "FAULTED",
			},
		}

		m.checkZFSPoolHealth(storage)

		m.mu.RLock()
		alert := m.activeAlerts["zfs-pool-state-local-zfs"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected state alert for FAULTED pool")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
	})

	t.Run("UNAVAIL pool creates critical alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:   "local-zfs",
			Name: "Local ZFS",
			Node: "pve-node1",
			ZFSPool: &models.ZFSPool{
				Name:  "rpool",
				State: "UNAVAIL",
			},
		}

		m.checkZFSPoolHealth(storage)

		m.mu.RLock()
		alert := m.activeAlerts["zfs-pool-state-local-zfs"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected state alert for UNAVAIL pool")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
	})

	t.Run("pool coming back ONLINE clears state alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Pre-create a state alert
		m.mu.Lock()
		m.activeAlerts["zfs-pool-state-local-zfs"] = &Alert{
			ID:    "zfs-pool-state-local-zfs",
			Level: AlertLevelWarning,
		}
		m.mu.Unlock()

		storage := models.Storage{
			ID:   "local-zfs",
			Name: "Local ZFS",
			Node: "pve-node1",
			ZFSPool: &models.ZFSPool{
				Name:  "rpool",
				State: "ONLINE",
			},
		}

		m.checkZFSPoolHealth(storage)

		m.mu.RLock()
		_, exists := m.activeAlerts["zfs-pool-state-local-zfs"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected state alert to be cleared when pool is ONLINE")
		}
	})

	t.Run("pool with errors creates error alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:   "local-zfs",
			Name: "Local ZFS",
			Node: "pve-node1",
			ZFSPool: &models.ZFSPool{
				Name:           "rpool",
				State:          "ONLINE",
				ReadErrors:     5,
				WriteErrors:    2,
				ChecksumErrors: 1,
			},
		}

		m.checkZFSPoolHealth(storage)

		m.mu.RLock()
		alert := m.activeAlerts["zfs-pool-errors-local-zfs"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected errors alert for pool with errors")
		}
		if alert.Type != "zfs-pool-errors" {
			t.Errorf("expected type 'zfs-pool-errors', got %s", alert.Type)
		}
		if alert.Value != 8 { // 5 + 2 + 1
			t.Errorf("expected value 8, got %f", alert.Value)
		}
	})

	t.Run("pool error count increase updates alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-1 * time.Hour)
		m.mu.Lock()
		m.activeAlerts["zfs-pool-errors-local-zfs"] = &Alert{
			ID:        "zfs-pool-errors-local-zfs",
			Value:     5,
			StartTime: oldTime,
		}
		m.mu.Unlock()

		storage := models.Storage{
			ID:   "local-zfs",
			Name: "Local ZFS",
			Node: "pve-node1",
			ZFSPool: &models.ZFSPool{
				Name:           "rpool",
				State:          "ONLINE",
				ReadErrors:     10,
				WriteErrors:    0,
				ChecksumErrors: 0,
			},
		}

		m.checkZFSPoolHealth(storage)

		m.mu.RLock()
		alert := m.activeAlerts["zfs-pool-errors-local-zfs"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected errors alert to exist")
		}
		if alert.Value != 10 {
			t.Errorf("expected value 10, got %f", alert.Value)
		}
		// Start time should be preserved
		if !alert.StartTime.Equal(oldTime) {
			t.Error("expected StartTime to be preserved on update")
		}
	})

	t.Run("pool with no errors clears error alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["zfs-pool-errors-local-zfs"] = &Alert{
			ID: "zfs-pool-errors-local-zfs",
		}
		m.mu.Unlock()

		storage := models.Storage{
			ID:   "local-zfs",
			Name: "Local ZFS",
			Node: "pve-node1",
			ZFSPool: &models.ZFSPool{
				Name:           "rpool",
				State:          "ONLINE",
				ReadErrors:     0,
				WriteErrors:    0,
				ChecksumErrors: 0,
			},
		}

		m.checkZFSPoolHealth(storage)

		m.mu.RLock()
		_, exists := m.activeAlerts["zfs-pool-errors-local-zfs"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected errors alert to be cleared when no errors")
		}
	})

	t.Run("device with errors creates device alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:   "local-zfs",
			Name: "Local ZFS",
			Node: "pve-node1",
			ZFSPool: &models.ZFSPool{
				Name:  "rpool",
				State: "ONLINE",
				Devices: []models.ZFSDevice{
					{Name: "sda", State: "ONLINE", ReadErrors: 3, WriteErrors: 0, ChecksumErrors: 0},
				},
			},
		}

		m.checkZFSPoolHealth(storage)

		m.mu.RLock()
		alert := m.activeAlerts["zfs-device-local-zfs-sda"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected device alert for device with errors")
		}
		if alert.Type != "zfs-device" {
			t.Errorf("expected type 'zfs-device', got %s", alert.Type)
		}
	})

	t.Run("device in FAULTED state creates critical alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:   "local-zfs",
			Name: "Local ZFS",
			Node: "pve-node1",
			ZFSPool: &models.ZFSPool{
				Name:  "rpool",
				State: "DEGRADED",
				Devices: []models.ZFSDevice{
					{Name: "sda", State: "FAULTED"},
				},
			},
		}

		m.checkZFSPoolHealth(storage)

		m.mu.RLock()
		alert := m.activeAlerts["zfs-device-local-zfs-sda"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected device alert for FAULTED device")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level for FAULTED device, got %s", alert.Level)
		}
	})

	t.Run("healthy device clears device alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["zfs-device-local-zfs-sda"] = &Alert{
			ID: "zfs-device-local-zfs-sda",
		}
		m.mu.Unlock()

		storage := models.Storage{
			ID:   "local-zfs",
			Name: "Local ZFS",
			Node: "pve-node1",
			ZFSPool: &models.ZFSPool{
				Name:  "rpool",
				State: "ONLINE",
				Devices: []models.ZFSDevice{
					{Name: "sda", State: "ONLINE", ReadErrors: 0, WriteErrors: 0, ChecksumErrors: 0},
				},
			},
		}

		m.checkZFSPoolHealth(storage)

		m.mu.RLock()
		_, exists := m.activeAlerts["zfs-device-local-zfs-sda"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected device alert to be cleared for healthy device")
		}
	})

	t.Run("SPARE device in normal state does not create alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		storage := models.Storage{
			ID:   "local-zfs",
			Name: "Local ZFS",
			Node: "pve-node1",
			ZFSPool: &models.ZFSPool{
				Name:  "rpool",
				State: "ONLINE",
				Devices: []models.ZFSDevice{
					{Name: "sdb", State: "SPARE", ReadErrors: 0, WriteErrors: 0, ChecksumErrors: 0},
				},
			},
		}

		m.checkZFSPoolHealth(storage)

		m.mu.RLock()
		_, exists := m.activeAlerts["zfs-device-local-zfs-sdb"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected no alert for SPARE device without errors")
		}
	})
}

func TestCheckPMGNodeQueues(t *testing.T) {
	// t.Parallel()

	t.Run("empty nodes returns early", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:    "pmg1",
			Name:  "PMG 1",
			Nodes: []models.PMGNodeStatus{},
		}

		defaults := PMGThresholdConfig{
			QueueTotalWarning: 100,
		}

		// Should not panic
		m.checkPMGNodeQueues(pmg, defaults)

		m.mu.RLock()
		count := len(m.activeAlerts)
		m.mu.RUnlock()

		if count != 0 {
			t.Errorf("expected no alerts for empty nodes, got %d", count)
		}
	})

	t.Run("nil QueueStatus is skipped", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: nil},
			},
		}

		defaults := PMGThresholdConfig{
			QueueTotalWarning: 100,
		}

		m.checkPMGNodeQueues(pmg, defaults)

		m.mu.RLock()
		count := len(m.activeAlerts)
		m.mu.RUnlock()

		if count != 0 {
			t.Errorf("expected no alerts for nil QueueStatus, got %d", count)
		}
	})

	t.Run("total queue warning alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Total: 80}},
			},
		}

		defaults := PMGThresholdConfig{
			QueueTotalWarning:  100, // 60% scaled = 60
			QueueTotalCritical: 200, // 80% scaled = 160
		}

		m.checkPMGNodeQueues(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-node1-queue-total"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected total queue warning alert")
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level, got %s", alert.Level)
		}
		if alert.Value != 80 {
			t.Errorf("expected value 80, got %f", alert.Value)
		}
	})

	t.Run("total queue critical alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Total: 200}},
			},
		}

		defaults := PMGThresholdConfig{
			QueueTotalWarning:  100, // 60% scaled = 60
			QueueTotalCritical: 200, // 80% scaled = 160
		}

		m.checkPMGNodeQueues(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-node1-queue-total"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected total queue critical alert")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
	})

	t.Run("deferred queue warning alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Deferred: 40}},
			},
		}

		defaults := PMGThresholdConfig{
			DeferredQueueWarn:     50, // 60% scaled = 30
			DeferredQueueCritical: 100,
		}

		m.checkPMGNodeQueues(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-node1-queue-deferred"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected deferred queue warning alert")
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level, got %s", alert.Level)
		}
	})

	t.Run("hold queue warning alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Hold: 25}},
			},
		}

		defaults := PMGThresholdConfig{
			HoldQueueWarn:     30, // 60% scaled = 18
			HoldQueueCritical: 60,
		}

		m.checkPMGNodeQueues(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-node1-queue-hold"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected hold queue warning alert")
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level, got %s", alert.Level)
		}
	})

	t.Run("oldest message age warning alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{OldestAge: 2400}}, // 40 minutes
			},
		}

		defaults := PMGThresholdConfig{
			OldestMessageWarnMins: 50, // 60% scaled = 30 minutes
			OldestMessageCritMins: 90,
		}

		m.checkPMGNodeQueues(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-node1-oldest-message"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected oldest message warning alert")
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level, got %s", alert.Level)
		}
		if alert.Value != 40 { // 2400 seconds / 60 = 40 minutes
			t.Errorf("expected value 40, got %f", alert.Value)
		}
	})

	t.Run("below threshold clears alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Pre-create an alert
		m.mu.Lock()
		m.activeAlerts["pmg1-node1-queue-total"] = &Alert{ID: "pmg1-node1-queue-total"}
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Total: 10}},
			},
		}

		defaults := PMGThresholdConfig{
			QueueTotalWarning:  100, // 60% scaled = 60
			QueueTotalCritical: 200,
		}

		m.checkPMGNodeQueues(pmg, defaults)

		m.mu.RLock()
		_, exists := m.activeAlerts["pmg1-node1-queue-total"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected alert to be cleared when below threshold")
		}
	})

	t.Run("outlier detection adds note to message", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Total: 10}},
				{Name: "node2", QueueStatus: &models.PMGQueueStatus{Total: 10}},
				{Name: "node3", QueueStatus: &models.PMGQueueStatus{Total: 100}}, // outlier
			},
		}

		defaults := PMGThresholdConfig{
			QueueTotalWarning:  100, // 60% scaled = 60
			QueueTotalCritical: 200,
		}

		m.checkPMGNodeQueues(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-node3-queue-total"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert for outlier node")
		}
		if !strings.Contains(alert.Message, "outlier") {
			t.Errorf("expected message to contain 'outlier', got %s", alert.Message)
		}
	})

	t.Run("no thresholds configured does not create alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Total: 1000, Deferred: 500, Hold: 300}},
			},
		}

		defaults := PMGThresholdConfig{} // All zero

		m.checkPMGNodeQueues(pmg, defaults)

		m.mu.RLock()
		count := len(m.activeAlerts)
		m.mu.RUnlock()

		if count != 0 {
			t.Errorf("expected no alerts when no thresholds configured, got %d", count)
		}
	})

	t.Run("updates existing alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		oldTime := time.Now().Add(-1 * time.Hour)
		m.mu.Lock()
		m.activeAlerts["pmg1-node1-queue-total"] = &Alert{
			ID:        "pmg1-node1-queue-total",
			Value:     60,
			Level:     AlertLevelWarning,
			LastSeen:  oldTime,
			StartTime: oldTime,
		}
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "PMG 1",
			Nodes: []models.PMGNodeStatus{
				{Name: "node1", QueueStatus: &models.PMGQueueStatus{Total: 200}},
			},
		}

		defaults := PMGThresholdConfig{
			QueueTotalWarning:  100, // 60% scaled = 60
			QueueTotalCritical: 200, // 80% scaled = 160
		}

		m.checkPMGNodeQueues(pmg, defaults)

		m.mu.RLock()
		alert := m.activeAlerts["pmg1-node1-queue-total"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected alert to exist")
		}
		if alert.Value != 200 {
			t.Errorf("expected value 200, got %f", alert.Value)
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
		if !alert.LastSeen.After(oldTime) {
			t.Error("expected LastSeen to be updated")
		}
	})
}

func TestDockerContainerHealthAlert(t *testing.T) {
	t.Run("healthy container - no alert", func(t *testing.T) {
		m := newTestManager(t)

		host := models.DockerHost{
			ID:          "host-health-1",
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:     "container-1",
					Name:   "healthy-app",
					State:  "running",
					Status: "Up 10 minutes",
					Health: "healthy",
				},
			},
		}

		m.CheckDockerHost(host)

		resourceID := dockerResourceID(host.ID, host.Containers[0].ID)
		alertID := fmt.Sprintf("docker-container-health-%s", resourceID)
		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected no health alert for healthy container")
		}
	})

	t.Run("container with empty health - no alert", func(t *testing.T) {
		m := newTestManager(t)

		host := models.DockerHost{
			ID:          "host-health-2",
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:     "container-2",
					Name:   "no-health-check",
					State:  "running",
					Status: "Up 10 minutes",
					Health: "",
				},
			},
		}

		m.CheckDockerHost(host)

		resourceID := dockerResourceID(host.ID, host.Containers[0].ID)
		alertID := fmt.Sprintf("docker-container-health-%s", resourceID)
		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected no health alert for container with empty health")
		}
	})

	t.Run("container with none health - no alert", func(t *testing.T) {
		m := newTestManager(t)

		host := models.DockerHost{
			ID:          "host-health-3",
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:     "container-3",
					Name:   "no-health-check",
					State:  "running",
					Status: "Up 10 minutes",
					Health: "none",
				},
			},
		}

		m.CheckDockerHost(host)

		resourceID := dockerResourceID(host.ID, host.Containers[0].ID)
		alertID := fmt.Sprintf("docker-container-health-%s", resourceID)
		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected no health alert for container with none health")
		}
	})

	t.Run("container starting - no alert", func(t *testing.T) {
		m := newTestManager(t)

		host := models.DockerHost{
			ID:          "host-health-4",
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:     "container-4",
					Name:   "starting-app",
					State:  "running",
					Status: "Up 5 seconds",
					Health: "starting",
				},
			},
		}

		m.CheckDockerHost(host)

		resourceID := dockerResourceID(host.ID, host.Containers[0].ID)
		alertID := fmt.Sprintf("docker-container-health-%s", resourceID)
		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected no health alert for starting container")
		}
	})

	t.Run("unhealthy container - critical alert", func(t *testing.T) {
		m := newTestManager(t)

		host := models.DockerHost{
			ID:          "host-health-5",
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:     "container-5",
					Name:   "unhealthy-app",
					State:  "running",
					Status: "Up 10 minutes (unhealthy)",
					Health: "unhealthy",
				},
			},
		}

		m.CheckDockerHost(host)

		resourceID := dockerResourceID(host.ID, host.Containers[0].ID)
		alertID := fmt.Sprintf("docker-container-health-%s", resourceID)
		alert, exists := m.activeAlerts[alertID]
		if !exists {
			t.Fatal("expected health alert for unhealthy container")
		}
		if alert.Level != AlertLevelCritical {
			t.Fatalf("expected critical alert for unhealthy container, got %s", alert.Level)
		}
		if alert.Type != "docker-container-health" {
			t.Fatalf("expected alert type docker-container-health, got %s", alert.Type)
		}
	})

	t.Run("container with other health status - warning alert", func(t *testing.T) {
		m := newTestManager(t)

		host := models.DockerHost{
			ID:          "host-health-6",
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:     "container-6",
					Name:   "degraded-app",
					State:  "running",
					Status: "Up 10 minutes",
					Health: "degraded",
				},
			},
		}

		m.CheckDockerHost(host)

		resourceID := dockerResourceID(host.ID, host.Containers[0].ID)
		alertID := fmt.Sprintf("docker-container-health-%s", resourceID)
		alert, exists := m.activeAlerts[alertID]
		if !exists {
			t.Fatal("expected health alert for degraded container")
		}
		if alert.Level != AlertLevelWarning {
			t.Fatalf("expected warning alert for non-unhealthy bad status, got %s", alert.Level)
		}
	})

	t.Run("alert cleared when container becomes healthy", func(t *testing.T) {
		m := newTestManager(t)

		hostID := "host-health-7"
		containerID := "container-7"

		// First check with unhealthy container
		hostUnhealthy := models.DockerHost{
			ID:          hostID,
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:     containerID,
					Name:   "recovering-app",
					State:  "running",
					Status: "Up 10 minutes (unhealthy)",
					Health: "unhealthy",
				},
			},
		}

		m.CheckDockerHost(hostUnhealthy)

		resourceID := dockerResourceID(hostID, containerID)
		alertID := fmt.Sprintf("docker-container-health-%s", resourceID)
		if _, exists := m.activeAlerts[alertID]; !exists {
			t.Fatal("expected health alert to be raised")
		}

		// Now container becomes healthy
		hostHealthy := models.DockerHost{
			ID:          hostID,
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:     containerID,
					Name:   "recovering-app",
					State:  "running",
					Status: "Up 15 minutes",
					Health: "healthy",
				},
			},
		}

		m.CheckDockerHost(hostHealthy)

		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected health alert to be cleared when container became healthy")
		}
	})
}

func TestDockerContainerOOMKillAlert(t *testing.T) {
	t.Run("running container - no alert", func(t *testing.T) {
		m := newTestManager(t)

		host := models.DockerHost{
			ID:          "host-oom-1",
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:       "container-1",
					Name:     "running-app",
					State:    "running",
					Status:   "Up 10 minutes",
					ExitCode: 0,
				},
			},
		}

		m.CheckDockerHost(host)

		resourceID := dockerResourceID(host.ID, host.Containers[0].ID)
		alertID := fmt.Sprintf("docker-container-oom-%s", resourceID)
		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected no OOM alert for running container")
		}
	})

	t.Run("exited container with non-137 exit code - no alert", func(t *testing.T) {
		m := newTestManager(t)

		host := models.DockerHost{
			ID:          "host-oom-2",
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:       "container-2",
					Name:     "normal-exit-app",
					State:    "exited",
					Status:   "Exited (1) 5 minutes ago",
					ExitCode: 1,
				},
			},
		}

		m.CheckDockerHost(host)

		resourceID := dockerResourceID(host.ID, host.Containers[0].ID)
		alertID := fmt.Sprintf("docker-container-oom-%s", resourceID)
		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected no OOM alert for container with exit code 1")
		}
	})

	t.Run("exited container with exit code 137 - critical OOM alert", func(t *testing.T) {
		m := newTestManager(t)

		host := models.DockerHost{
			ID:          "host-oom-3",
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:          "container-3",
					Name:        "oom-killed-app",
					State:       "exited",
					Status:      "Exited (137) 1 minute ago",
					ExitCode:    137,
					MemoryUsage: 512 * 1024 * 1024,
					MemoryLimit: 512 * 1024 * 1024,
				},
			},
		}

		m.CheckDockerHost(host)

		resourceID := dockerResourceID(host.ID, host.Containers[0].ID)
		alertID := fmt.Sprintf("docker-container-oom-%s", resourceID)
		alert, exists := m.activeAlerts[alertID]
		if !exists {
			t.Fatal("expected OOM alert for container with exit code 137")
		}
		if alert.Level != AlertLevelCritical {
			t.Fatalf("expected critical OOM alert, got %s", alert.Level)
		}
		if alert.Type != "docker-container-oom-kill" {
			t.Fatalf("expected alert type docker-container-oom-kill, got %s", alert.Type)
		}
	})

	t.Run("dead container with exit code 137 - critical OOM alert", func(t *testing.T) {
		m := newTestManager(t)

		host := models.DockerHost{
			ID:          "host-oom-dead",
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:       "container-dead",
					Name:     "dead-oom-app",
					State:    "dead",
					Status:   "Dead",
					ExitCode: 137,
				},
			},
		}

		m.CheckDockerHost(host)

		resourceID := dockerResourceID(host.ID, host.Containers[0].ID)
		alertID := fmt.Sprintf("docker-container-oom-%s", resourceID)
		alert, exists := m.activeAlerts[alertID]
		if !exists {
			t.Fatal("expected OOM alert for dead container with exit code 137")
		}
		if alert.Level != AlertLevelCritical {
			t.Fatalf("expected critical OOM alert, got %s", alert.Level)
		}
	})

	t.Run("repeated 137 exit code - no new alert", func(t *testing.T) {
		m := newTestManager(t)

		hostID := "host-oom-4"
		containerID := "container-4"

		host := models.DockerHost{
			ID:          hostID,
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:       containerID,
					Name:     "oom-killed-app",
					State:    "exited",
					Status:   "Exited (137) 1 minute ago",
					ExitCode: 137,
				},
			},
		}

		// First check - should create alert
		m.CheckDockerHost(host)

		resourceID := dockerResourceID(hostID, containerID)
		alertID := fmt.Sprintf("docker-container-oom-%s", resourceID)
		alert1, exists := m.activeAlerts[alertID]
		if !exists {
			t.Fatal("expected OOM alert on first check")
		}
		startTime := alert1.StartTime

		// Second check with same exit code - should not create new alert
		m.CheckDockerHost(host)

		alert2, exists := m.activeAlerts[alertID]
		if !exists {
			t.Fatal("expected OOM alert to still exist on second check")
		}
		if alert2.StartTime != startTime {
			t.Fatal("expected alert start time to be preserved (not a new alert)")
		}
	})

	t.Run("container recovers - alert cleared", func(t *testing.T) {
		m := newTestManager(t)

		hostID := "host-oom-5"
		containerID := "container-5"

		// First check with OOM killed container
		hostOOM := models.DockerHost{
			ID:          hostID,
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:       containerID,
					Name:     "recovering-app",
					State:    "exited",
					Status:   "Exited (137) 1 minute ago",
					ExitCode: 137,
				},
			},
		}

		m.CheckDockerHost(hostOOM)

		resourceID := dockerResourceID(hostID, containerID)
		alertID := fmt.Sprintf("docker-container-oom-%s", resourceID)
		if _, exists := m.activeAlerts[alertID]; !exists {
			t.Fatal("expected OOM alert to be raised")
		}

		// Container is restarted and running again
		hostRunning := models.DockerHost{
			ID:          hostID,
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:       containerID,
					Name:     "recovering-app",
					State:    "running",
					Status:   "Up 30 seconds",
					ExitCode: 0,
				},
			},
		}

		m.CheckDockerHost(hostRunning)

		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected OOM alert to be cleared when container started running")
		}
	})

	t.Run("container exits with different code - alert cleared", func(t *testing.T) {
		m := newTestManager(t)

		hostID := "host-oom-6"
		containerID := "container-6"

		// First check with OOM killed container
		hostOOM := models.DockerHost{
			ID:          hostID,
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:       containerID,
					Name:     "multi-exit-app",
					State:    "exited",
					Status:   "Exited (137) 1 minute ago",
					ExitCode: 137,
				},
			},
		}

		m.CheckDockerHost(hostOOM)

		resourceID := dockerResourceID(hostID, containerID)
		alertID := fmt.Sprintf("docker-container-oom-%s", resourceID)
		if _, exists := m.activeAlerts[alertID]; !exists {
			t.Fatal("expected OOM alert to be raised")
		}

		// Container exits with different exit code (normal error)
		hostNormalExit := models.DockerHost{
			ID:          hostID,
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:       containerID,
					Name:     "multi-exit-app",
					State:    "exited",
					Status:   "Exited (1) 30 seconds ago",
					ExitCode: 1,
				},
			},
		}

		m.CheckDockerHost(hostNormalExit)

		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected OOM alert to be cleared when container exited with different code")
		}
	})
}

func TestDockerContainerRestartLoopAlert(t *testing.T) {
	t.Run("first check - no alert", func(t *testing.T) {
		m := newTestManager(t)

		host := models.DockerHost{
			ID:          "host-restart-1",
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:           "container-1",
					Name:         "first-check-app",
					State:        "running",
					Status:       "Up 10 minutes",
					RestartCount: 5, // Even with high restart count, first check just initializes
				},
			},
		}

		m.CheckDockerHost(host)

		resourceID := dockerResourceID(host.ID, host.Containers[0].ID)
		alertID := fmt.Sprintf("docker-container-restart-loop-%s", resourceID)
		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected no restart loop alert on first check (just initializes tracking)")
		}

		// Verify tracking was initialized
		m.mu.Lock()
		record, exists := m.dockerRestartTracking[resourceID]
		m.mu.Unlock()
		if !exists {
			t.Fatal("expected tracking record to be initialized")
		}
		if record.lastCount != 5 {
			t.Fatalf("expected lastCount=5, got %d", record.lastCount)
		}
	})

	t.Run("stable restart count - no alert", func(t *testing.T) {
		m := newTestManager(t)

		hostID := "host-restart-2"
		containerID := "container-2"

		host := models.DockerHost{
			ID:          hostID,
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:           containerID,
					Name:         "stable-app",
					State:        "running",
					Status:       "Up 10 minutes",
					RestartCount: 2,
				},
			},
		}

		// First check - initializes tracking
		m.CheckDockerHost(host)

		// Second check - same restart count
		m.CheckDockerHost(host)

		// Third check - still same restart count
		m.CheckDockerHost(host)

		resourceID := dockerResourceID(hostID, containerID)
		alertID := fmt.Sprintf("docker-container-restart-loop-%s", resourceID)
		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected no restart loop alert for stable container")
		}
	})

	t.Run("restarts under threshold - no alert", func(t *testing.T) {
		m := newTestManager(t)
		// Configure threshold to 3 (default)
		m.config.DockerDefaults.RestartCount = 3
		m.config.DockerDefaults.RestartWindow = 300

		hostID := "host-restart-3"
		containerID := "container-3"

		// First check - initializes with RestartCount=0
		host := models.DockerHost{
			ID:          hostID,
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:           containerID,
					Name:         "under-threshold-app",
					State:        "running",
					Status:       "Up 10 minutes",
					RestartCount: 0,
				},
			},
		}
		m.CheckDockerHost(host)

		// Container restarts twice (under threshold of 3)
		host.Containers[0].RestartCount = 2
		m.CheckDockerHost(host)

		// One more restart (now at 3, threshold is >3 so still no alert)
		host.Containers[0].RestartCount = 3
		m.CheckDockerHost(host)

		resourceID := dockerResourceID(hostID, containerID)
		alertID := fmt.Sprintf("docker-container-restart-loop-%s", resourceID)
		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected no restart loop alert when restarts <= threshold")
		}

		// Verify we tracked 3 restarts
		m.mu.Lock()
		record := m.dockerRestartTracking[resourceID]
		recentCount := len(record.times)
		m.mu.Unlock()
		if recentCount != 3 {
			t.Fatalf("expected 3 tracked restarts, got %d", recentCount)
		}
	})

	t.Run("hits restart loop threshold - alert raised", func(t *testing.T) {
		m := newTestManager(t)
		// Configure threshold to 3 (alert when >3)
		m.config.DockerDefaults.RestartCount = 3
		m.config.DockerDefaults.RestartWindow = 300

		hostID := "host-restart-4"
		containerID := "container-4"

		// First check - initializes with RestartCount=0
		host := models.DockerHost{
			ID:          hostID,
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:           containerID,
					Name:         "restart-loop-app",
					State:        "running",
					Status:       "Up 1 minute",
					RestartCount: 0,
				},
			},
		}
		m.CheckDockerHost(host)

		// Container restarts 4 times (exceeds threshold of 3)
		host.Containers[0].RestartCount = 4
		m.CheckDockerHost(host)

		resourceID := dockerResourceID(hostID, containerID)
		alertID := fmt.Sprintf("docker-container-restart-loop-%s", resourceID)
		alert, exists := m.activeAlerts[alertID]
		if !exists {
			t.Fatal("expected restart loop alert when restarts > threshold")
		}
		if alert.Level != AlertLevelCritical {
			t.Fatalf("expected critical alert, got %s", alert.Level)
		}
		if alert.Type != "docker-container-restart-loop" {
			t.Fatalf("expected alert type docker-container-restart-loop, got %s", alert.Type)
		}

		// Verify metadata
		if alert.Metadata["restartCount"] != 4 {
			t.Fatalf("expected restartCount=4 in metadata, got %v", alert.Metadata["restartCount"])
		}
		if alert.Metadata["recentRestarts"] != 4 {
			t.Fatalf("expected recentRestarts=4 in metadata, got %v", alert.Metadata["recentRestarts"])
		}
	})

	t.Run("restart loop recovery - alert cleared", func(t *testing.T) {
		m := newTestManager(t)
		// Configure short window for testing
		m.config.DockerDefaults.RestartCount = 3
		m.config.DockerDefaults.RestartWindow = 1 // 1 second window for testing

		hostID := "host-restart-5"
		containerID := "container-5"
		resourceID := dockerResourceID(hostID, containerID)

		// Manually set up a restart loop state
		m.mu.Lock()
		now := time.Now()
		m.dockerRestartTracking[resourceID] = &dockerRestartRecord{
			count:       5,
			lastCount:   5,
			times:       []time.Time{now, now, now, now}, // 4 recent restarts
			lastChecked: now,
		}
		m.mu.Unlock()

		// Create initial alert
		host := models.DockerHost{
			ID:          hostID,
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:           containerID,
					Name:         "recovering-app",
					State:        "running",
					Status:       "Up 1 minute",
					RestartCount: 5,
				},
			},
		}
		m.CheckDockerHost(host)

		alertID := fmt.Sprintf("docker-container-restart-loop-%s", resourceID)
		if _, exists := m.activeAlerts[alertID]; !exists {
			t.Fatal("expected restart loop alert to be raised initially")
		}

		// Wait for time window to pass
		time.Sleep(1100 * time.Millisecond)

		// Check again with same restart count - old restarts should be cleaned up
		m.CheckDockerHost(host)

		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected restart loop alert to be cleared after window passes")
		}
	})

	t.Run("incremental restarts trigger alert", func(t *testing.T) {
		m := newTestManager(t)
		m.config.DockerDefaults.RestartCount = 2
		m.config.DockerDefaults.RestartWindow = 300

		hostID := "host-restart-6"
		containerID := "container-6"

		host := models.DockerHost{
			ID:          hostID,
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:           containerID,
					Name:         "incremental-restart-app",
					State:        "running",
					Status:       "Up 1 minute",
					RestartCount: 0,
				},
			},
		}

		// First check - initializes
		m.CheckDockerHost(host)

		resourceID := dockerResourceID(hostID, containerID)
		alertID := fmt.Sprintf("docker-container-restart-loop-%s", resourceID)

		// Restart 1
		host.Containers[0].RestartCount = 1
		m.CheckDockerHost(host)
		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected no alert after 1 restart")
		}

		// Restart 2
		host.Containers[0].RestartCount = 2
		m.CheckDockerHost(host)
		if _, exists := m.activeAlerts[alertID]; exists {
			t.Fatal("expected no alert after 2 restarts (threshold is >2)")
		}

		// Restart 3 - exceeds threshold
		host.Containers[0].RestartCount = 3
		m.CheckDockerHost(host)
		if _, exists := m.activeAlerts[alertID]; !exists {
			t.Fatal("expected alert after 3 restarts (>2 threshold)")
		}
	})

	t.Run("alert preserves start time on updates", func(t *testing.T) {
		m := newTestManager(t)
		m.config.DockerDefaults.RestartCount = 2
		m.config.DockerDefaults.RestartWindow = 300

		hostID := "host-restart-7"
		containerID := "container-7"

		host := models.DockerHost{
			ID:          hostID,
			DisplayName: "Docker Host",
			Hostname:    "docker.local",
			Containers: []models.DockerContainer{
				{
					ID:           containerID,
					Name:         "preserve-time-app",
					State:        "running",
					Status:       "Up 1 minute",
					RestartCount: 0,
				},
			},
		}

		// Initialize and trigger alert
		m.CheckDockerHost(host)
		host.Containers[0].RestartCount = 5
		m.CheckDockerHost(host)

		resourceID := dockerResourceID(hostID, containerID)
		alertID := fmt.Sprintf("docker-container-restart-loop-%s", resourceID)
		alert1, exists := m.activeAlerts[alertID]
		if !exists {
			t.Fatal("expected alert to be raised")
		}
		startTime1 := alert1.StartTime

		// More restarts - alert should update but preserve start time
		time.Sleep(10 * time.Millisecond)
		host.Containers[0].RestartCount = 7
		m.CheckDockerHost(host)

		alert2, exists := m.activeAlerts[alertID]
		if !exists {
			t.Fatal("expected alert to still exist")
		}
		if !alert2.StartTime.Equal(startTime1) {
			t.Fatalf("expected start time to be preserved, got %v vs %v", alert2.StartTime, startTime1)
		}
	})
}

func TestApplyThresholdOverride(t *testing.T) {
	t.Run("empty override returns base unchanged", func(t *testing.T) {
		m := newTestManager(t)
		base := ThresholdConfig{
			CPU:    &HysteresisThreshold{Trigger: 80, Clear: 75},
			Memory: &HysteresisThreshold{Trigger: 90, Clear: 85},
		}
		override := ThresholdConfig{}
		result := m.applyThresholdOverride(base, override)

		if result.CPU == nil || result.CPU.Trigger != 80 || result.CPU.Clear != 75 {
			t.Errorf("expected CPU to match base, got %+v", result.CPU)
		}
		if result.Memory == nil || result.Memory.Trigger != 90 || result.Memory.Clear != 85 {
			t.Errorf("expected Memory to match base, got %+v", result.Memory)
		}
		if result.Disabled {
			t.Error("expected Disabled to remain false")
		}
	})

	t.Run("Disabled flag override", func(t *testing.T) {
		m := newTestManager(t)
		base := ThresholdConfig{Disabled: false}
		override := ThresholdConfig{Disabled: true}
		result := m.applyThresholdOverride(base, override)

		if !result.Disabled {
			t.Error("expected Disabled to be true after override")
		}
	})

	t.Run("DisableConnectivity override", func(t *testing.T) {
		m := newTestManager(t)
		base := ThresholdConfig{DisableConnectivity: false}
		override := ThresholdConfig{DisableConnectivity: true}
		result := m.applyThresholdOverride(base, override)

		if !result.DisableConnectivity {
			t.Error("expected DisableConnectivity to be true after override")
		}
	})

	t.Run("CPU threshold override", func(t *testing.T) {
		m := newTestManager(t)
		base := ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80, Clear: 75},
		}
		override := ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 95, Clear: 90},
		}
		result := m.applyThresholdOverride(base, override)

		if result.CPU == nil {
			t.Fatal("expected CPU to be set")
		}
		if result.CPU.Trigger != 95 || result.CPU.Clear != 90 {
			t.Errorf("expected CPU override values, got Trigger=%v Clear=%v", result.CPU.Trigger, result.CPU.Clear)
		}
	})

	t.Run("legacy CPU threshold conversion", func(t *testing.T) {
		m := newTestManager(t)
		m.config.HysteresisMargin = 5.0
		base := ThresholdConfig{}
		legacyVal := 85.0
		override := ThresholdConfig{
			CPULegacy: &legacyVal,
		}
		result := m.applyThresholdOverride(base, override)

		if result.CPU == nil {
			t.Fatal("expected CPU to be converted from legacy")
		}
		if result.CPU.Trigger != 85.0 {
			t.Errorf("expected Trigger=85, got %v", result.CPU.Trigger)
		}
		if result.CPU.Clear != 80.0 {
			t.Errorf("expected Clear=80 (85-5 margin), got %v", result.CPU.Clear)
		}
	})

	t.Run("modern CPU takes precedence over legacy", func(t *testing.T) {
		m := newTestManager(t)
		legacyVal := 70.0
		base := ThresholdConfig{}
		override := ThresholdConfig{
			CPU:       &HysteresisThreshold{Trigger: 95, Clear: 90},
			CPULegacy: &legacyVal,
		}
		result := m.applyThresholdOverride(base, override)

		if result.CPU.Trigger != 95 {
			t.Errorf("expected modern CPU to take precedence, got Trigger=%v", result.CPU.Trigger)
		}
	})

	t.Run("multiple metrics override", func(t *testing.T) {
		m := newTestManager(t)
		base := ThresholdConfig{
			CPU:    &HysteresisThreshold{Trigger: 80, Clear: 75},
			Memory: &HysteresisThreshold{Trigger: 80, Clear: 75},
			Disk:   &HysteresisThreshold{Trigger: 80, Clear: 75},
		}
		override := ThresholdConfig{
			CPU:        &HysteresisThreshold{Trigger: 90, Clear: 85},
			Memory:     &HysteresisThreshold{Trigger: 95, Clear: 90},
			NetworkIn:  &HysteresisThreshold{Trigger: 100, Clear: 95},
			NetworkOut: &HysteresisThreshold{Trigger: 200, Clear: 190},
		}
		result := m.applyThresholdOverride(base, override)

		if result.CPU.Trigger != 90 {
			t.Errorf("expected CPU override, got %v", result.CPU.Trigger)
		}
		if result.Memory.Trigger != 95 {
			t.Errorf("expected Memory override, got %v", result.Memory.Trigger)
		}
		// Disk should remain unchanged (not in override)
		if result.Disk.Trigger != 80 {
			t.Errorf("expected Disk unchanged, got %v", result.Disk.Trigger)
		}
		if result.NetworkIn == nil || result.NetworkIn.Trigger != 100 {
			t.Errorf("expected NetworkIn to be added, got %+v", result.NetworkIn)
		}
		if result.NetworkOut == nil || result.NetworkOut.Trigger != 200 {
			t.Errorf("expected NetworkOut to be added, got %+v", result.NetworkOut)
		}
	})

	t.Run("Note override", func(t *testing.T) {
		m := newTestManager(t)
		base := ThresholdConfig{}
		note := "test note"
		override := ThresholdConfig{Note: &note}
		result := m.applyThresholdOverride(base, override)

		if result.Note == nil || *result.Note != "test note" {
			t.Errorf("expected Note to be set, got %v", result.Note)
		}
	})

	t.Run("Note cleared when empty string", func(t *testing.T) {
		m := newTestManager(t)
		existingNote := "existing note"
		base := ThresholdConfig{Note: &existingNote}
		emptyNote := ""
		override := ThresholdConfig{Note: &emptyNote}
		result := m.applyThresholdOverride(base, override)

		if result.Note != nil {
			t.Errorf("expected Note to be nil when empty string override, got %v", *result.Note)
		}
	})

	t.Run("Note trimmed of whitespace", func(t *testing.T) {
		m := newTestManager(t)
		base := ThresholdConfig{}
		note := "  trimmed note  "
		override := ThresholdConfig{Note: &note}
		result := m.applyThresholdOverride(base, override)

		if result.Note == nil || *result.Note != "trimmed note" {
			t.Errorf("expected Note to be trimmed, got %v", result.Note)
		}
	})

	t.Run("whitespace-only Note becomes nil", func(t *testing.T) {
		m := newTestManager(t)
		existingNote := "existing"
		base := ThresholdConfig{Note: &existingNote}
		whitespaceNote := "   "
		override := ThresholdConfig{Note: &whitespaceNote}
		result := m.applyThresholdOverride(base, override)

		if result.Note != nil {
			t.Errorf("expected whitespace-only Note to become nil, got %v", *result.Note)
		}
	})

	t.Run("all metric types with legacy conversion", func(t *testing.T) {
		m := newTestManager(t)
		m.config.HysteresisMargin = 5.0
		base := ThresholdConfig{}

		val80 := 80.0
		val90 := 90.0
		val100 := 100.0
		val200 := 200.0
		override := ThresholdConfig{
			MemoryLegacy:     &val80,
			DiskLegacy:       &val90,
			DiskReadLegacy:   &val100,
			DiskWriteLegacy:  &val100,
			NetworkInLegacy:  &val200,
			NetworkOutLegacy: &val200,
		}
		result := m.applyThresholdOverride(base, override)

		if result.Memory == nil || result.Memory.Trigger != 80 {
			t.Errorf("expected Memory converted, got %+v", result.Memory)
		}
		if result.Disk == nil || result.Disk.Trigger != 90 {
			t.Errorf("expected Disk converted, got %+v", result.Disk)
		}
		if result.DiskRead == nil || result.DiskRead.Trigger != 100 {
			t.Errorf("expected DiskRead converted, got %+v", result.DiskRead)
		}
		if result.DiskWrite == nil || result.DiskWrite.Trigger != 100 {
			t.Errorf("expected DiskWrite converted, got %+v", result.DiskWrite)
		}
		if result.NetworkIn == nil || result.NetworkIn.Trigger != 200 {
			t.Errorf("expected NetworkIn converted, got %+v", result.NetworkIn)
		}
		if result.NetworkOut == nil || result.NetworkOut.Trigger != 200 {
			t.Errorf("expected NetworkOut converted, got %+v", result.NetworkOut)
		}
	})

	t.Run("Temperature and Usage override", func(t *testing.T) {
		m := newTestManager(t)
		base := ThresholdConfig{}
		override := ThresholdConfig{
			Temperature: &HysteresisThreshold{Trigger: 85, Clear: 80},
			Usage:       &HysteresisThreshold{Trigger: 90, Clear: 85},
		}
		result := m.applyThresholdOverride(base, override)

		if result.Temperature == nil || result.Temperature.Trigger != 85 {
			t.Errorf("expected Temperature override, got %+v", result.Temperature)
		}
		if result.Usage == nil || result.Usage.Trigger != 90 {
			t.Errorf("expected Usage override, got %+v", result.Usage)
		}
	})

	t.Run("ensureHysteresisThreshold fills missing Clear", func(t *testing.T) {
		m := newTestManager(t)
		base := ThresholdConfig{}
		override := ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80, Clear: 0}, // Clear not set
		}
		result := m.applyThresholdOverride(base, override)

		if result.CPU == nil {
			t.Fatal("expected CPU to be set")
		}
		// ensureHysteresisThreshold sets Clear to Trigger - 5 when Clear <= 0
		if result.CPU.Clear != 75 {
			t.Errorf("expected Clear to be 75 (80-5 default), got %v", result.CPU.Clear)
		}
	})
}

func TestSuppressGuestAlerts(t *testing.T) {
	t.Run("no alerts for guest returns false", func(t *testing.T) {
		m := newTestManager(t)

		result := m.suppressGuestAlerts("vm100")
		if result {
			t.Error("expected false when no alerts exist for guest")
		}
	})

	t.Run("active alert with exact ResourceID match clears and returns true", func(t *testing.T) {
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["vm100-cpu"] = &Alert{
			ID:         "vm100-cpu",
			ResourceID: "vm100",
			Type:       "cpu",
		}
		m.mu.Unlock()

		result := m.suppressGuestAlerts("vm100")
		if !result {
			t.Error("expected true when active alert was cleared")
		}

		m.mu.RLock()
		if _, exists := m.activeAlerts["vm100-cpu"]; exists {
			t.Error("expected alert to be cleared from activeAlerts")
		}
		m.mu.RUnlock()
	})

	t.Run("active alert with prefix match clears", func(t *testing.T) {
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["vm100/disk1-disk"] = &Alert{
			ID:         "vm100/disk1-disk",
			ResourceID: "vm100/disk1",
			Type:       "disk",
		}
		m.mu.Unlock()

		result := m.suppressGuestAlerts("vm100")
		if !result {
			t.Error("expected true when active alert was cleared")
		}

		m.mu.RLock()
		if _, exists := m.activeAlerts["vm100/disk1-disk"]; exists {
			t.Error("expected alert with prefix match to be cleared")
		}
		m.mu.RUnlock()
	})

	t.Run("clears from all auxiliary maps", func(t *testing.T) {
		m := newTestManager(t)

		now := time.Now()
		m.mu.Lock()
		m.activeAlerts["vm100-cpu"] = &Alert{
			ID:         "vm100-cpu",
			ResourceID: "vm100",
			Type:       "cpu",
		}
		m.pendingAlerts["vm100-memory"] = now
		m.recentAlerts["vm100-disk"] = &Alert{ID: "vm100-disk", ResourceID: "vm100"}
		m.suppressedUntil["vm100-network"] = now.Add(time.Hour)
		m.alertRateLimit["vm100-io"] = []time.Time{now}
		m.offlineConfirmations["vm100"] = 1
		m.mu.Unlock()

		result := m.suppressGuestAlerts("vm100")
		if !result {
			t.Error("expected true when active alert was cleared")
		}

		m.mu.RLock()
		defer m.mu.RUnlock()

		if _, exists := m.activeAlerts["vm100-cpu"]; exists {
			t.Error("expected activeAlerts to be cleared")
		}
		if _, exists := m.pendingAlerts["vm100-memory"]; exists {
			t.Error("expected pendingAlerts to be cleared")
		}
		if _, exists := m.recentAlerts["vm100-disk"]; exists {
			t.Error("expected recentAlerts to be cleared")
		}
		if _, exists := m.suppressedUntil["vm100-network"]; exists {
			t.Error("expected suppressedUntil to be cleared")
		}
		if _, exists := m.alertRateLimit["vm100-io"]; exists {
			t.Error("expected alertRateLimit to be cleared")
		}
		if _, exists := m.offlineConfirmations["vm100"]; exists {
			t.Error("expected offlineConfirmations to be cleared")
		}
	})

	t.Run("multiple alerts cleared", func(t *testing.T) {
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["vm100-cpu"] = &Alert{
			ID:         "vm100-cpu",
			ResourceID: "vm100",
			Type:       "cpu",
		}
		m.activeAlerts["vm100-memory"] = &Alert{
			ID:         "vm100-memory",
			ResourceID: "vm100",
			Type:       "memory",
		}
		m.activeAlerts["vm100/disk0-disk"] = &Alert{
			ID:         "vm100/disk0-disk",
			ResourceID: "vm100/disk0",
			Type:       "disk",
		}
		// Also add an alert for a different guest that should NOT be cleared
		m.activeAlerts["vm200-cpu"] = &Alert{
			ID:         "vm200-cpu",
			ResourceID: "vm200",
			Type:       "cpu",
		}
		m.mu.Unlock()

		result := m.suppressGuestAlerts("vm100")
		if !result {
			t.Error("expected true when alerts were cleared")
		}

		m.mu.RLock()
		defer m.mu.RUnlock()

		if _, exists := m.activeAlerts["vm100-cpu"]; exists {
			t.Error("expected vm100-cpu to be cleared")
		}
		if _, exists := m.activeAlerts["vm100-memory"]; exists {
			t.Error("expected vm100-memory to be cleared")
		}
		if _, exists := m.activeAlerts["vm100/disk0-disk"]; exists {
			t.Error("expected vm100/disk0-disk to be cleared")
		}
		if _, exists := m.activeAlerts["vm200-cpu"]; !exists {
			t.Error("expected vm200-cpu to NOT be cleared")
		}
	})

	t.Run("clears auxiliary maps even without active alerts", func(t *testing.T) {
		m := newTestManager(t)

		now := time.Now()
		m.mu.Lock()
		// No active alerts, but has entries in auxiliary maps
		m.pendingAlerts["vm100-memory"] = now
		m.recentAlerts["vm100-disk"] = &Alert{ID: "vm100-disk", ResourceID: "vm100"}
		m.suppressedUntil["vm100-network"] = now.Add(time.Hour)
		m.alertRateLimit["vm100-io"] = []time.Time{now}
		m.offlineConfirmations["vm100"] = 1
		m.mu.Unlock()

		result := m.suppressGuestAlerts("vm100")
		// Returns false because no active alerts were cleared
		if result {
			t.Error("expected false when no active alerts were cleared")
		}

		m.mu.RLock()
		defer m.mu.RUnlock()

		// But auxiliary maps should still be cleared
		if _, exists := m.pendingAlerts["vm100-memory"]; exists {
			t.Error("expected pendingAlerts to be cleared")
		}
		if _, exists := m.recentAlerts["vm100-disk"]; exists {
			t.Error("expected recentAlerts to be cleared")
		}
		if _, exists := m.suppressedUntil["vm100-network"]; exists {
			t.Error("expected suppressedUntil to be cleared")
		}
		if _, exists := m.alertRateLimit["vm100-io"]; exists {
			t.Error("expected alertRateLimit to be cleared")
		}
		if _, exists := m.offlineConfirmations["vm100"]; exists {
			t.Error("expected offlineConfirmations to be cleared")
		}
	})
}

func TestGuestHasMonitorOnlyAlerts(t *testing.T) {
	t.Run("no alerts returns false", func(t *testing.T) {
		m := newTestManager(t)

		result := m.guestHasMonitorOnlyAlerts("vm100")
		if result {
			t.Error("expected false when no alerts exist")
		}
	})

	t.Run("has non-monitor-only alert returns false", func(t *testing.T) {
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["vm100-cpu"] = &Alert{
			ID:         "vm100-cpu",
			ResourceID: "vm100",
			Type:       "cpu",
			Metadata:   nil, // No metadata means not monitor-only
		}
		m.mu.Unlock()

		result := m.guestHasMonitorOnlyAlerts("vm100")
		if result {
			t.Error("expected false when alert is not monitor-only")
		}
	})

	t.Run("has monitor-only alert with bool metadata returns true", func(t *testing.T) {
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["vm100-cpu"] = &Alert{
			ID:         "vm100-cpu",
			ResourceID: "vm100",
			Type:       "cpu",
			Metadata: map[string]interface{}{
				"monitorOnly": true,
			},
		}
		m.mu.Unlock()

		result := m.guestHasMonitorOnlyAlerts("vm100")
		if !result {
			t.Error("expected true when monitor-only alert exists")
		}
	})

	t.Run("has monitor-only alert with string metadata returns true", func(t *testing.T) {
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["vm100-cpu"] = &Alert{
			ID:         "vm100-cpu",
			ResourceID: "vm100",
			Type:       "cpu",
			Metadata: map[string]interface{}{
				"monitorOnly": "true",
			},
		}
		m.mu.Unlock()

		result := m.guestHasMonitorOnlyAlerts("vm100")
		if !result {
			t.Error("expected true when monitor-only alert exists (string metadata)")
		}
	})

	t.Run("alert for different guest not matched", func(t *testing.T) {
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["vm200-cpu"] = &Alert{
			ID:         "vm200-cpu",
			ResourceID: "vm200",
			Type:       "cpu",
			Metadata: map[string]interface{}{
				"monitorOnly": true,
			},
		}
		m.mu.Unlock()

		result := m.guestHasMonitorOnlyAlerts("vm100")
		if result {
			t.Error("expected false when monitor-only alert is for different guest")
		}
	})

	t.Run("monitorOnly false returns false", func(t *testing.T) {
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["vm100-cpu"] = &Alert{
			ID:         "vm100-cpu",
			ResourceID: "vm100",
			Type:       "cpu",
			Metadata: map[string]interface{}{
				"monitorOnly": false,
			},
		}
		m.mu.Unlock()

		result := m.guestHasMonitorOnlyAlerts("vm100")
		if result {
			t.Error("expected false when monitorOnly is explicitly false")
		}
	})
}

func TestCheckNode(t *testing.T) {
	// t.Parallel()

	t.Run("returns early when alerts disabled", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = false
		m.mu.Unlock()

		node := models.Node{
			ID:     "node1",
			Name:   "Node 1",
			CPU:    0.95, // Would trigger alert if enabled
			Status: "online",
		}

		m.CheckNode(node)

		m.mu.RLock()
		alertCount := len(m.activeAlerts)
		m.mu.RUnlock()

		if alertCount != 0 {
			t.Errorf("expected no alerts when disabled, got %d", alertCount)
		}
	})

	t.Run("DisableAllNodes clears existing alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Pre-create alerts that should be cleared
		m.mu.Lock()
		m.activeAlerts["node1-cpu"] = &Alert{ID: "node1-cpu", ResourceID: "node1", Type: "cpu"}
		m.activeAlerts["node1-memory"] = &Alert{ID: "node1-memory", ResourceID: "node1", Type: "memory"}
		m.activeAlerts["node1-disk"] = &Alert{ID: "node1-disk", ResourceID: "node1", Type: "disk"}
		m.activeAlerts["node1-temperature"] = &Alert{ID: "node1-temperature", ResourceID: "node1", Type: "temperature"}
		m.activeAlerts["node-offline-node1"] = &Alert{ID: "node-offline-node1", ResourceID: "node1", Type: "connectivity"}
		m.nodeOfflineCount["node1"] = 5
		m.config.DisableAllNodes = true
		m.mu.Unlock()

		node := models.Node{ID: "node1", Name: "Node 1", Status: "online"}
		m.CheckNode(node)

		m.mu.RLock()
		_, cpuExists := m.activeAlerts["node1-cpu"]
		_, memExists := m.activeAlerts["node1-memory"]
		_, diskExists := m.activeAlerts["node1-disk"]
		_, tempExists := m.activeAlerts["node1-temperature"]
		_, offlineExists := m.activeAlerts["node-offline-node1"]
		_, countExists := m.nodeOfflineCount["node1"]
		m.mu.RUnlock()

		if cpuExists {
			t.Error("expected cpu alert to be cleared")
		}
		if memExists {
			t.Error("expected memory alert to be cleared")
		}
		if diskExists {
			t.Error("expected disk alert to be cleared")
		}
		if tempExists {
			t.Error("expected temperature alert to be cleared")
		}
		if offlineExists {
			t.Error("expected offline alert to be cleared")
		}
		if countExists {
			t.Error("expected offline count to be cleared")
		}
	})

	t.Run("DisableNodesOffline clears tracking and offline alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Pre-create offline alert and tracking
		m.mu.Lock()
		m.activeAlerts["node-offline-node1"] = &Alert{ID: "node-offline-node1", ResourceID: "node1", Type: "connectivity"}
		m.nodeOfflineCount["node1"] = 3
		m.config.DisableAllNodesOffline = true
		m.mu.Unlock()

		node := models.Node{ID: "node1", Name: "Node 1", Status: "offline"}
		m.CheckNode(node)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["node-offline-node1"]
		_, countExists := m.nodeOfflineCount["node1"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected offline alert to be cleared")
		}
		if countExists {
			t.Error("expected offline count to be cleared")
		}
	})

	t.Run("offline node triggers offline check", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Pre-set count to trigger alert on this call
		m.mu.Lock()
		m.nodeOfflineCount["node1"] = 2
		m.mu.Unlock()

		node := models.Node{
			ID:       "node1",
			Name:     "Node 1",
			Instance: "pve1",
			Status:   "offline",
		}
		m.CheckNode(node)

		m.mu.RLock()
		alert := m.activeAlerts["node-offline-node1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected offline alert to be created")
		}
		if alert.Type != "connectivity" {
			t.Errorf("expected type connectivity, got %s", alert.Type)
		}
	})

	t.Run("node with connection error triggers offline check", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.nodeOfflineCount["node1"] = 2
		m.mu.Unlock()

		node := models.Node{
			ID:               "node1",
			Name:             "Node 1",
			Instance:         "pve1",
			Status:           "online",
			ConnectionHealth: "error",
		}
		m.CheckNode(node)

		m.mu.RLock()
		alert := m.activeAlerts["node-offline-node1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected offline alert for connection error")
		}
	})

	t.Run("node with connection failed triggers offline check", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.nodeOfflineCount["node1"] = 2
		m.mu.Unlock()

		node := models.Node{
			ID:               "node1",
			Name:             "Node 1",
			Instance:         "pve1",
			Status:           "online",
			ConnectionHealth: "failed",
		}
		m.CheckNode(node)

		m.mu.RLock()
		alert := m.activeAlerts["node-offline-node1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected offline alert for connection failed")
		}
	})

	t.Run("online node clears offline alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Pre-create offline alert
		m.mu.Lock()
		m.activeAlerts["node-offline-node1"] = &Alert{
			ID:         "node-offline-node1",
			ResourceID: "node1",
			Type:       "connectivity",
		}
		m.nodeOfflineCount["node1"] = 5
		m.mu.Unlock()

		node := models.Node{
			ID:               "node1",
			Name:             "Node 1",
			Instance:         "pve1",
			Status:           "online",
			ConnectionHealth: "connected",
		}
		m.CheckNode(node)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["node-offline-node1"]
		_, countExists := m.nodeOfflineCount["node1"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected offline alert to be cleared")
		}
		if countExists {
			t.Error("expected offline count to be cleared")
		}
	})

	t.Run("online node triggers metric checks", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Set thresholds that will trigger and disable time threshold
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.NodeDefaults = ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		node := models.Node{
			ID:       "node1",
			Name:     "Node 1",
			Instance: "pve1",
			Status:   "online",
			CPU:      0.95, // 95% - above trigger
		}
		m.CheckNode(node)

		m.mu.RLock()
		alert := m.activeAlerts["node1-cpu"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected cpu alert to be created")
		}
		if alert.Type != "cpu" {
			t.Errorf("expected type cpu, got %s", alert.Type)
		}
	})

	t.Run("offline node skips metric checks", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.NodeDefaults = ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		node := models.Node{
			ID:     "node1",
			Name:   "Node 1",
			Status: "offline",
			CPU:    0.95, // Would trigger if checked
		}
		m.CheckNode(node)

		m.mu.RLock()
		_, cpuExists := m.activeAlerts["node1-cpu"]
		m.mu.RUnlock()

		if cpuExists {
			t.Error("expected no cpu alert for offline node")
		}
	})

	t.Run("applies override thresholds", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.NodeDefaults = ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.config.Overrides = map[string]ThresholdConfig{
			"node1": {
				CPU: &HysteresisThreshold{Trigger: 99.0, Clear: 90.0}, // Higher threshold
			},
		}
		m.mu.Unlock()

		node := models.Node{
			ID:       "node1",
			Name:     "Node 1",
			Instance: "pve1",
			Status:   "online",
			CPU:      0.95, // 95% - below override trigger of 99%
		}
		m.CheckNode(node)

		m.mu.RLock()
		_, cpuExists := m.activeAlerts["node1-cpu"]
		m.mu.RUnlock()

		if cpuExists {
			t.Error("expected no alert due to higher override threshold")
		}
	})

	t.Run("checks temperature with package temp", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.NodeDefaults = ThresholdConfig{
			Temperature: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		node := models.Node{
			ID:       "node1",
			Name:     "Node 1",
			Instance: "pve1",
			Status:   "online",
			Temperature: &models.Temperature{
				Available:  true,
				CPUPackage: 90.0, // Above trigger
				CPUMax:     85.0,
			},
		}
		m.CheckNode(node)

		m.mu.RLock()
		alert := m.activeAlerts["node1-temperature"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected temperature alert")
		}
	})

	t.Run("checks temperature with max temp fallback", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.NodeDefaults = ThresholdConfig{
			Temperature: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		node := models.Node{
			ID:       "node1",
			Name:     "Node 1",
			Instance: "pve1",
			Status:   "online",
			Temperature: &models.Temperature{
				Available:  true,
				CPUPackage: 0,    // Zero - will use max
				CPUMax:     90.0, // Above trigger
			},
		}
		m.CheckNode(node)

		m.mu.RLock()
		alert := m.activeAlerts["node1-temperature"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected temperature alert using max temp fallback")
		}
	})

	t.Run("skips temperature when not available", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.NodeDefaults = ThresholdConfig{
			Temperature: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		node := models.Node{
			ID:       "node1",
			Name:     "Node 1",
			Instance: "pve1",
			Status:   "online",
			Temperature: &models.Temperature{
				Available:  false, // Not available
				CPUPackage: 90.0,
			},
		}
		m.CheckNode(node)

		m.mu.RLock()
		_, tempExists := m.activeAlerts["node1-temperature"]
		m.mu.RUnlock()

		if tempExists {
			t.Error("expected no temperature alert when not available")
		}
	})

	t.Run("skips temperature when nil", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.NodeDefaults = ThresholdConfig{
			Temperature: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		node := models.Node{
			ID:          "node1",
			Name:        "Node 1",
			Instance:    "pve1",
			Status:      "online",
			Temperature: nil, // Nil temperature
		}
		m.CheckNode(node)

		m.mu.RLock()
		_, tempExists := m.activeAlerts["node1-temperature"]
		m.mu.RUnlock()

		if tempExists {
			t.Error("expected no temperature alert when temp is nil")
		}
	})

	t.Run("skips temperature when threshold nil", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// No temperature threshold set
		m.mu.Lock()
		m.config.NodeDefaults = ThresholdConfig{
			Temperature: nil,
		}
		m.mu.Unlock()

		node := models.Node{
			ID:       "node1",
			Name:     "Node 1",
			Instance: "pve1",
			Status:   "online",
			Temperature: &models.Temperature{
				Available:  true,
				CPUPackage: 90.0,
			},
		}
		m.CheckNode(node)

		m.mu.RLock()
		_, tempExists := m.activeAlerts["node1-temperature"]
		m.mu.RUnlock()

		if tempExists {
			t.Error("expected no temperature alert when threshold nil")
		}
	})

	t.Run("checks memory metric", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.NodeDefaults = ThresholdConfig{
			Memory: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		node := models.Node{
			ID:       "node1",
			Name:     "Node 1",
			Instance: "pve1",
			Status:   "online",
			Memory: models.Memory{
				Usage: 95.0, // Above trigger
			},
		}
		m.CheckNode(node)

		m.mu.RLock()
		alert := m.activeAlerts["node1-memory"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected memory alert")
		}
	})

	t.Run("checks disk metric", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.NodeDefaults = ThresholdConfig{
			Disk: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		node := models.Node{
			ID:       "node1",
			Name:     "Node 1",
			Instance: "pve1",
			Status:   "online",
			Disk: models.Disk{
				Usage: 95.0, // Above trigger
			},
		}
		m.CheckNode(node)

		m.mu.RLock()
		alert := m.activeAlerts["node1-disk"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected disk alert")
		}
	})
}

func TestCheckGuest(t *testing.T) {
	// t.Parallel()

	t.Run("returns early when alerts disabled", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = false
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
			CPU:    0.95,
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		alertCount := len(m.activeAlerts)
		m.mu.RUnlock()

		if alertCount != 0 {
			t.Errorf("expected no alerts when disabled, got %d", alertCount)
		}
	})

	t.Run("returns early when all guests disabled", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.DisableAllGuests = true
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
			CPU:    0.95,
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		alertCount := len(m.activeAlerts)
		m.mu.RUnlock()

		if alertCount != 0 {
			t.Errorf("expected no alerts when all guests disabled, got %d", alertCount)
		}
	})

	t.Run("handles VM type correctly", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
			CPU:    0.95, // 95%
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		alert := m.activeAlerts["vm100-cpu"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected cpu alert for VM")
		}
	})

	t.Run("handles Container type correctly", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		ct := models.Container{
			ID:     "ct101",
			Name:   "TestCT",
			Node:   "node1",
			Status: "running",
			CPU:    0.95, // 95%
		}

		m.CheckGuest(ct, "pve1")

		m.mu.RLock()
		alert := m.activeAlerts["ct101-cpu"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected cpu alert for Container")
		}
	})

	t.Run("returns for unsupported guest type", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Pass a string which is unsupported
		m.CheckGuest("invalid", "pve1")

		m.mu.RLock()
		alertCount := len(m.activeAlerts)
		m.mu.RUnlock()

		if alertCount != 0 {
			t.Errorf("expected no alerts for unsupported type, got %d", alertCount)
		}
	})

	t.Run("suppresses alerts with pulse-no-alerts tag", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Pre-create an alert
		m.mu.Lock()
		m.activeAlerts["vm100-cpu"] = &Alert{
			ID:         "vm100-cpu",
			ResourceID: "vm100",
			Type:       "cpu",
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
			CPU:    0.95,
			Tags:   []string{"pulse-no-alerts"},
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		_, exists := m.activeAlerts["vm100-cpu"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected alert to be suppressed with pulse-no-alerts tag")
		}
	})

	t.Run("stopped guest triggers powered-off check", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Pre-set confirmation count to trigger alert
		m.mu.Lock()
		m.offlineConfirmations["vm100"] = 2
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "stopped",
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		alert := m.activeAlerts["guest-powered-off-vm100"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected powered-off alert for stopped guest")
		}
	})

	t.Run("stopped guest with DisableAllGuestsOffline clears tracking", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.DisableAllGuestsOffline = true
		m.offlineConfirmations["vm100"] = 5
		m.activeAlerts["guest-powered-off-vm100"] = &Alert{
			ID:         "guest-powered-off-vm100",
			ResourceID: "vm100",
			Type:       "powered-off",
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "stopped",
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		_, alertExists := m.activeAlerts["guest-powered-off-vm100"]
		_, countExists := m.offlineConfirmations["vm100"]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected powered-off alert to be cleared")
		}
		if countExists {
			t.Error("expected offline count to be cleared")
		}
	})

	t.Run("paused guest clears powered-off alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["guest-powered-off-vm100"] = &Alert{
			ID:         "guest-powered-off-vm100",
			ResourceID: "vm100",
			Type:       "powered-off",
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "paused",
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		_, exists := m.activeAlerts["guest-powered-off-vm100"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected powered-off alert to be cleared for paused guest")
		}
	})

	t.Run("non-running guest clears metric alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["vm100-cpu"] = &Alert{
			ID:         "vm100-cpu",
			ResourceID: "vm100",
			Type:       "cpu",
		}
		m.activeAlerts["vm100-memory"] = &Alert{
			ID:         "vm100-memory",
			ResourceID: "vm100",
			Type:       "memory",
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "stopped",
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		_, cpuExists := m.activeAlerts["vm100-cpu"]
		_, memExists := m.activeAlerts["vm100-memory"]
		m.mu.RUnlock()

		if cpuExists {
			t.Error("expected cpu alert to be cleared for non-running guest")
		}
		if memExists {
			t.Error("expected memory alert to be cleared for non-running guest")
		}
	})

	t.Run("running guest clears powered-off alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["guest-powered-off-vm100"] = &Alert{
			ID:         "guest-powered-off-vm100",
			ResourceID: "vm100",
			Type:       "powered-off",
		}
		m.offlineConfirmations["vm100"] = 5
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		_, exists := m.activeAlerts["guest-powered-off-vm100"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected powered-off alert to be cleared for running guest")
		}
	})

	t.Run("disabled thresholds clear existing alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["vm100-cpu"] = &Alert{
			ID:         "vm100-cpu",
			ResourceID: "vm100",
			Type:       "cpu",
		}
		m.config.Overrides = map[string]ThresholdConfig{
			"vm100": {Disabled: true},
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		_, exists := m.activeAlerts["vm100-cpu"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected alert to be cleared when guest has alerts disabled")
		}
	})

	t.Run("checks memory metric", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			Memory: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
			Memory: models.Memory{Usage: 95.0},
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		alert := m.activeAlerts["vm100-memory"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected memory alert")
		}
	})

	t.Run("checks disk metric", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			Disk: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
			Disk:   models.Disk{Usage: 95.0},
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		alert := m.activeAlerts["vm100-disk"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected disk alert")
		}
	})

	t.Run("checks individual disks", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			Disk: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
			Disks: []models.Disk{
				{Mountpoint: "/", Usage: 95.0, Total: 100},
				{Mountpoint: "/data", Usage: 50.0, Total: 100},
			},
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		// Check that alert for high disk was created
		var foundDiskAlert bool
		for alertID := range m.activeAlerts {
			if strings.Contains(alertID, "vm100-disk-") {
				foundDiskAlert = true
				break
			}
		}
		m.mu.RUnlock()

		if !foundDiskAlert {
			t.Fatal("expected individual disk alert")
		}
	})

	t.Run("skips disk with zero total", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			Disk: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
			Disks: []models.Disk{
				{Mountpoint: "/", Usage: 95.0, Total: 0}, // Zero total - should skip
			},
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		var foundDiskAlert bool
		for alertID := range m.activeAlerts {
			if strings.Contains(alertID, "vm100-disk-") {
				foundDiskAlert = true
				break
			}
		}
		m.mu.RUnlock()

		if foundDiskAlert {
			t.Error("expected no disk alert for disk with zero total")
		}
	})

	t.Run("skips disk with negative usage", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			Disk: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
			Disks: []models.Disk{
				{Mountpoint: "/", Usage: -1.0, Total: 100}, // Negative usage - should skip
			},
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		var foundDiskAlert bool
		for alertID := range m.activeAlerts {
			if strings.Contains(alertID, "vm100-disk-") {
				foundDiskAlert = true
				break
			}
		}
		m.mu.RUnlock()

		if foundDiskAlert {
			t.Error("expected no disk alert for disk with negative usage")
		}
	})

	t.Run("checks diskRead metric", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			DiskRead: &HysteresisThreshold{Trigger: 100.0, Clear: 80.0}, // MB/s
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:       "vm100",
			Name:     "TestVM",
			Node:     "node1",
			Status:   "running",
			DiskRead: 200 * 1024 * 1024, // 200 MB/s in bytes
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		alert := m.activeAlerts["vm100-diskRead"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected diskRead alert")
		}
	})

	t.Run("checks diskWrite metric", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			DiskWrite: &HysteresisThreshold{Trigger: 100.0, Clear: 80.0}, // MB/s
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:        "vm100",
			Name:      "TestVM",
			Node:      "node1",
			Status:    "running",
			DiskWrite: 200 * 1024 * 1024, // 200 MB/s in bytes
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		alert := m.activeAlerts["vm100-diskWrite"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected diskWrite alert")
		}
	})

	t.Run("checks networkIn metric", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			NetworkIn: &HysteresisThreshold{Trigger: 100.0, Clear: 80.0}, // MB/s
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:        "vm100",
			Name:      "TestVM",
			Node:      "node1",
			Status:    "running",
			NetworkIn: 200 * 1024 * 1024, // 200 MB/s in bytes
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		alert := m.activeAlerts["vm100-networkIn"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected networkIn alert")
		}
	})

	t.Run("checks networkOut metric", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			NetworkOut: &HysteresisThreshold{Trigger: 100.0, Clear: 80.0}, // MB/s
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:         "vm100",
			Name:       "TestVM",
			Node:       "node1",
			Status:     "running",
			NetworkOut: 200 * 1024 * 1024, // 200 MB/s in bytes
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		alert := m.activeAlerts["vm100-networkOut"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected networkOut alert")
		}
	})

	t.Run("applies relaxed thresholds with pulse-relaxed tag", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		// CPU at 90% - would trigger normally but relaxed threshold is 95%
		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
			CPU:    0.90, // 90%
			Tags:   []string{"pulse-relaxed"},
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		_, exists := m.activeAlerts["vm100-cpu"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected no alert due to relaxed thresholds")
		}
	})

	t.Run("disk uses device as label fallback", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			Disk: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
			Disks: []models.Disk{
				{Device: "sda1", Usage: 95.0, Total: 100}, // No mountpoint, has device
			},
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		var foundDiskAlert bool
		for alertID := range m.activeAlerts {
			if strings.Contains(alertID, "vm100-disk-") {
				foundDiskAlert = true
				break
			}
		}
		m.mu.RUnlock()

		if !foundDiskAlert {
			t.Fatal("expected disk alert using device as label")
		}
	})

	t.Run("disk uses index as label when no mountpoint or device", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.GuestDefaults = ThresholdConfig{
			Disk: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		vm := models.VM{
			ID:     "vm100",
			Name:   "TestVM",
			Node:   "node1",
			Status: "running",
			Disks: []models.Disk{
				{Usage: 95.0, Total: 100}, // No mountpoint or device
			},
		}

		m.CheckGuest(vm, "pve1")

		m.mu.RLock()
		var foundDiskAlert bool
		for alertID := range m.activeAlerts {
			if strings.Contains(alertID, "vm100-disk-") {
				foundDiskAlert = true
				break
			}
		}
		m.mu.RUnlock()

		if !foundDiskAlert {
			t.Fatal("expected disk alert using index as label")
		}
	})
}

func TestCheckHostComprehensive(t *testing.T) {
	// t.Parallel()

	t.Run("returns early for empty host ID", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.HostDefaults = ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		host := models.Host{
			ID:       "",
			CPUUsage: 95.0,
		}

		m.CheckHost(host)

		m.mu.RLock()
		alertCount := len(m.activeAlerts)
		m.mu.RUnlock()

		if alertCount != 0 {
			t.Errorf("expected no alerts for empty host ID, got %d", alertCount)
		}
	})

	t.Run("returns early when alerts disabled", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)
		m.mu.Lock()
		m.config.Enabled = false
		m.mu.Unlock()

		host := models.Host{
			ID:       "host1",
			CPUUsage: 95.0,
		}

		m.CheckHost(host)

		m.mu.RLock()
		alertCount := len(m.activeAlerts)
		m.mu.RUnlock()

		if alertCount != 0 {
			t.Errorf("expected no alerts when disabled, got %d", alertCount)
		}
	})

	t.Run("DisableAllHosts clears existing alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["host:host1-cpu"] = &Alert{ID: "host:host1-cpu", ResourceID: "host:host1", Type: "cpu"}
		m.activeAlerts["host:host1-memory"] = &Alert{ID: "host:host1-memory", ResourceID: "host:host1", Type: "memory"}
		m.config.DisableAllHosts = true
		m.mu.Unlock()

		host := models.Host{
			ID:       "host1",
			CPUUsage: 95.0,
		}

		m.CheckHost(host)

		m.mu.RLock()
		_, cpuExists := m.activeAlerts["host:host1-cpu"]
		_, memExists := m.activeAlerts["host:host1-memory"]
		m.mu.RUnlock()

		if cpuExists {
			t.Error("expected cpu alert to be cleared")
		}
		if memExists {
			t.Error("expected memory alert to be cleared")
		}
	})

	t.Run("override with Disabled clears alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["host:host1-cpu"] = &Alert{ID: "host:host1-cpu", ResourceID: "host:host1", Type: "cpu"}
		m.config.Overrides = map[string]ThresholdConfig{
			"host1": {Disabled: true},
		}
		m.mu.Unlock()

		host := models.Host{
			ID:       "host1",
			CPUUsage: 95.0,
		}

		m.CheckHost(host)

		m.mu.RLock()
		_, exists := m.activeAlerts["host:host1-cpu"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected alert to be cleared when host has alerts disabled")
		}
	})

	t.Run("clears CPU alerts when threshold nil", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["host:host1-cpu"] = &Alert{ID: "host:host1-cpu", ResourceID: "host:host1", Type: "cpu"}
		m.config.HostDefaults = ThresholdConfig{
			CPU: nil, // No CPU threshold
		}
		m.mu.Unlock()

		host := models.Host{
			ID:       "host1",
			CPUUsage: 95.0,
		}

		m.CheckHost(host)

		m.mu.RLock()
		_, exists := m.activeAlerts["host:host1-cpu"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected CPU alert to be cleared when threshold is nil")
		}
	})

	t.Run("clears memory alerts when threshold nil", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["host:host1-memory"] = &Alert{ID: "host:host1-memory", ResourceID: "host:host1", Type: "memory"}
		m.config.HostDefaults = ThresholdConfig{
			Memory: nil, // No memory threshold
		}
		m.mu.Unlock()

		host := models.Host{
			ID: "host1",
			Memory: models.Memory{
				Usage: 95.0,
			},
		}

		m.CheckHost(host)

		m.mu.RLock()
		_, exists := m.activeAlerts["host:host1-memory"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected memory alert to be cleared when threshold is nil")
		}
	})

	t.Run("clears disk alerts when threshold nil", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		// Disk alert ID format: {resourceID}-disk where resourceID is host:hostID/disk:mountpoint
		alertID := "host:host1/disk:/-disk"
		m.mu.Lock()
		m.activeAlerts[alertID] = &Alert{ID: alertID, ResourceID: "host:host1/disk:/", Type: "disk"}
		m.config.HostDefaults = ThresholdConfig{
			Disk: nil, // No disk threshold
		}
		m.mu.Unlock()

		host := models.Host{
			ID: "host1",
			Disks: []models.Disk{
				{Mountpoint: "/", Usage: 95.0, Total: 100},
			},
		}

		m.CheckHost(host)

		m.mu.RLock()
		_, exists := m.activeAlerts[alertID]
		m.mu.RUnlock()

		if exists {
			t.Error("expected disk alert to be cleared when threshold is nil")
		}
	})

	t.Run("RAID degraded creates critical alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.Host{
			ID:       "host1",
			Hostname: "testhost",
			RAID: []models.HostRAIDArray{
				{
					Device:        "/dev/md2", // Note: md0/md1 are skipped for Synology compatibility
					Level:         "raid1",
					State:         "degraded",
					TotalDevices:  2,
					ActiveDevices: 1,
					FailedDevices: 1,
				},
			},
		}

		m.CheckHost(host)

		m.mu.RLock()
		alert := m.activeAlerts["host-host1-raid-md2"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected RAID degraded alert")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level, got %s", alert.Level)
		}
	})

	t.Run("RAID rebuilding creates warning alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.Host{
			ID:       "host1",
			Hostname: "testhost",
			RAID: []models.HostRAIDArray{
				{
					Device:         "/dev/md2", // Note: md0/md1 are skipped for Synology compatibility
					Level:          "raid1",
					State:          "recovering",
					TotalDevices:   2,
					ActiveDevices:  2,
					FailedDevices:  0,
					RebuildPercent: 50.0,
				},
			},
		}

		m.CheckHost(host)

		m.mu.RLock()
		alert := m.activeAlerts["host-host1-raid-md2"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected RAID rebuilding alert")
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level, got %s", alert.Level)
		}
	})

	t.Run("RAID healthy clears alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["host-host1-raid-md2"] = &Alert{
			ID:    "host-host1-raid-md2",
			Type:  "raid",
			Level: AlertLevelCritical,
		}
		m.mu.Unlock()

		host := models.Host{
			ID:       "host1",
			Hostname: "testhost",
			RAID: []models.HostRAIDArray{
				{
					Device:        "/dev/md2", // Note: md0/md1 are skipped for Synology compatibility
					Level:         "raid1",
					State:         "active",
					TotalDevices:  2,
					ActiveDevices: 2,
					FailedDevices: 0,
				},
			},
		}

		m.CheckHost(host)

		m.mu.RLock()
		_, exists := m.activeAlerts["host-host1-raid-md2"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected RAID alert to be cleared for healthy array")
		}
	})

	t.Run("RAID with failed devices triggers degraded", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.Host{
			ID:       "host1",
			Hostname: "testhost",
			RAID: []models.HostRAIDArray{
				{
					Device:        "/dev/md2", // Note: md0/md1 are skipped for Synology compatibility
					Level:         "raid1",
					State:         "active", // State might say active but with failed devices
					TotalDevices:  2,
					ActiveDevices: 1,
					FailedDevices: 1, // This triggers degraded alert
				},
			},
		}

		m.CheckHost(host)

		m.mu.RLock()
		alert := m.activeAlerts["host-host1-raid-md2"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected RAID alert for failed devices")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("expected critical level for failed devices, got %s", alert.Level)
		}
	})

	t.Run("RAID resync triggers rebuilding alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		host := models.Host{
			ID:       "host1",
			Hostname: "testhost",
			RAID: []models.HostRAIDArray{
				{
					Device:        "/dev/md2", // Note: md0/md1 are skipped for Synology compatibility
					Level:         "raid1",
					State:         "resync",
					TotalDevices:  2,
					ActiveDevices: 2,
					FailedDevices: 0,
				},
			},
		}

		m.CheckHost(host)

		m.mu.RLock()
		alert := m.activeAlerts["host-host1-raid-md2"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected RAID rebuilding alert for resync")
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("expected warning level for resync, got %s", alert.Level)
		}
	})

	t.Run("existing RAID alert not duplicated", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		originalTime := time.Now().Add(-1 * time.Hour)
		m.mu.Lock()
		m.activeAlerts["host-host1-raid-md2"] = &Alert{
			ID:        "host-host1-raid-md2",
			Type:      "raid",
			Level:     AlertLevelCritical,
			StartTime: originalTime,
		}
		m.mu.Unlock()

		host := models.Host{
			ID:       "host1",
			Hostname: "testhost",
			RAID: []models.HostRAIDArray{
				{
					Device:        "/dev/md2", // Note: md0/md1 are skipped for Synology compatibility
					Level:         "raid1",
					State:         "degraded",
					TotalDevices:  2,
					ActiveDevices: 1,
					FailedDevices: 1,
				},
			},
		}

		m.CheckHost(host)

		m.mu.RLock()
		alert := m.activeAlerts["host-host1-raid-md2"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected RAID alert to still exist")
		}
		// The alert should preserve its original start time
		if !alert.StartTime.Equal(originalTime) {
			t.Error("expected alert start time to be preserved")
		}
	})

	t.Run("applies override thresholds", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.HostDefaults = ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.config.Overrides = map[string]ThresholdConfig{
			"host1": {
				CPU: &HysteresisThreshold{Trigger: 99.0, Clear: 95.0}, // Higher threshold
			},
		}
		m.mu.Unlock()

		host := models.Host{
			ID:       "host1",
			Hostname: "testhost",
			CPUUsage: 95.0, // Below override trigger
		}

		m.CheckHost(host)

		m.mu.RLock()
		_, exists := m.activeAlerts["host:host1-cpu"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected no alert due to higher override threshold")
		}
	})

	t.Run("checks multiple disks", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.HostDefaults = ThresholdConfig{
			Disk: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		host := models.Host{
			ID:       "host1",
			Hostname: "testhost",
			Disks: []models.Disk{
				{Mountpoint: "/", Usage: 95.0, Total: 100},
				{Mountpoint: "/data", Usage: 50.0, Total: 100}, // Below threshold
			},
		}

		m.CheckHost(host)

		m.mu.RLock()
		var diskAlertCount int
		for alertID := range m.activeAlerts {
			// Disk alert ID format: host:hostID/disk:label-disk
			if strings.Contains(alertID, "host:host1/disk:") {
				diskAlertCount++
			}
		}
		m.mu.RUnlock()

		if diskAlertCount != 1 {
			t.Errorf("expected 1 disk alert, got %d", diskAlertCount)
		}
	})

	t.Run("clears offline alert when host comes online", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		resourceKey := hostResourceID("host1")
		m.mu.Lock()
		m.activeAlerts["host-offline-host1"] = &Alert{
			ID:   "host-offline-host1",
			Type: "connectivity",
		}
		m.offlineConfirmations[resourceKey] = 5
		m.mu.Unlock()

		host := models.Host{
			ID:       "host1",
			Hostname: "testhost",
		}

		m.CheckHost(host)

		m.mu.RLock()
		_, alertExists := m.activeAlerts["host-offline-host1"]
		_, countExists := m.offlineConfirmations[resourceKey]
		m.mu.RUnlock()

		if alertExists {
			t.Error("expected offline alert to be cleared")
		}
		if countExists {
			t.Error("expected offline count to be cleared")
		}
	})

	t.Run("includes tags in metadata", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.HostDefaults = ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		host := models.Host{
			ID:       "host1",
			Hostname: "testhost",
			CPUUsage: 95.0,
			Tags:     []string{"production", "critical"},
		}

		m.CheckHost(host)

		m.mu.RLock()
		alert := m.activeAlerts["host:host1-cpu"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected CPU alert")
		}
		if alert.Metadata == nil {
			t.Fatal("expected metadata in alert")
		}
		tags, ok := alert.Metadata["tags"].([]string)
		if !ok || len(tags) != 2 {
			t.Error("expected tags in metadata")
		}
	})
}

func TestCheckPBSComprehensive(t *testing.T) {
	// t.Parallel()

	t.Run("returns early when alerts disabled", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.Enabled = false
		m.mu.Unlock()

		pbs := models.PBSInstance{
			ID:   "pbs1",
			Name: "testpbs",
			CPU:  95.0,
		}

		m.CheckPBS(pbs)

		m.mu.RLock()
		alertCount := len(m.activeAlerts)
		m.mu.RUnlock()

		if alertCount != 0 {
			t.Errorf("expected no alerts when disabled, got %d", alertCount)
		}
	})

	t.Run("DisableAllPBS clears existing alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["pbs1-cpu"] = &Alert{ID: "pbs1-cpu", Type: "cpu"}
		m.activeAlerts["pbs1-memory"] = &Alert{ID: "pbs1-memory", Type: "memory"}
		m.activeAlerts["pbs-offline-pbs1"] = &Alert{ID: "pbs-offline-pbs1", Type: "connectivity"}
		m.offlineConfirmations["pbs1"] = 3
		m.config.DisableAllPBS = true
		m.mu.Unlock()

		pbs := models.PBSInstance{
			ID:   "pbs1",
			Name: "testpbs",
		}

		m.CheckPBS(pbs)

		m.mu.RLock()
		_, cpuExists := m.activeAlerts["pbs1-cpu"]
		_, memExists := m.activeAlerts["pbs1-memory"]
		_, offlineExists := m.activeAlerts["pbs-offline-pbs1"]
		_, confirmExists := m.offlineConfirmations["pbs1"]
		m.mu.RUnlock()

		if cpuExists {
			t.Error("expected CPU alert to be cleared")
		}
		if memExists {
			t.Error("expected memory alert to be cleared")
		}
		if offlineExists {
			t.Error("expected offline alert to be cleared")
		}
		if confirmExists {
			t.Error("expected offline confirmation to be cleared")
		}
	})

	t.Run("override with Disabled clears alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["pbs1-cpu"] = &Alert{ID: "pbs1-cpu", Type: "cpu"}
		m.activeAlerts["pbs1-memory"] = &Alert{ID: "pbs1-memory", Type: "memory"}
		m.activeAlerts["pbs-offline-pbs1"] = &Alert{ID: "pbs-offline-pbs1", Type: "connectivity"}
		m.offlineConfirmations["pbs1"] = 3
		m.config.Overrides = map[string]ThresholdConfig{
			"pbs1": {Disabled: true},
		}
		m.mu.Unlock()

		pbs := models.PBSInstance{
			ID:   "pbs1",
			Name: "testpbs",
		}

		m.CheckPBS(pbs)

		m.mu.RLock()
		_, cpuExists := m.activeAlerts["pbs1-cpu"]
		_, memExists := m.activeAlerts["pbs1-memory"]
		_, offlineExists := m.activeAlerts["pbs-offline-pbs1"]
		_, confirmExists := m.offlineConfirmations["pbs1"]
		m.mu.RUnlock()

		if cpuExists {
			t.Error("expected CPU alert to be cleared")
		}
		if memExists {
			t.Error("expected memory alert to be cleared")
		}
		if offlineExists {
			t.Error("expected offline alert to be cleared")
		}
		if confirmExists {
			t.Error("expected offline confirmation to be cleared")
		}
	})

	t.Run("DisableAllPBSOffline clears offline alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["pbs-offline-pbs1"] = &Alert{ID: "pbs-offline-pbs1", Type: "connectivity"}
		m.offlineConfirmations["pbs1"] = 3
		m.config.DisableAllPBSOffline = true
		m.mu.Unlock()

		pbs := models.PBSInstance{
			ID:     "pbs1",
			Name:   "testpbs",
			Status: "offline",
		}

		m.CheckPBS(pbs)

		m.mu.RLock()
		_, offlineExists := m.activeAlerts["pbs-offline-pbs1"]
		_, confirmExists := m.offlineConfirmations["pbs1"]
		m.mu.RUnlock()

		if offlineExists {
			t.Error("expected offline alert to be cleared when DisableAllPBSOffline is true")
		}
		if confirmExists {
			t.Error("expected offline confirmation to be cleared")
		}
	})

	t.Run("checks CPU threshold when online", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.NodeDefaults = ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		pbs := models.PBSInstance{
			ID:     "pbs1",
			Name:   "testpbs",
			Host:   "pbshost",
			Status: "online",
			CPU:    95.0,
		}

		m.CheckPBS(pbs)

		m.mu.RLock()
		alert := m.activeAlerts["pbs1-cpu"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected CPU alert")
		}
	})

	t.Run("checks memory threshold when online", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.NodeDefaults = ThresholdConfig{
			Memory: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		pbs := models.PBSInstance{
			ID:     "pbs1",
			Name:   "testpbs",
			Host:   "pbshost",
			Status: "online",
			Memory: 95.0,
		}

		m.CheckPBS(pbs)

		m.mu.RLock()
		alert := m.activeAlerts["pbs1-memory"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected memory alert")
		}
	})

	t.Run("skips metrics when PBS is offline", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.NodeDefaults = ThresholdConfig{
			CPU:    &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
			Memory: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.mu.Unlock()

		pbs := models.PBSInstance{
			ID:     "pbs1",
			Name:   "testpbs",
			Status: "offline",
			CPU:    95.0,
			Memory: 95.0,
		}

		m.CheckPBS(pbs)

		m.mu.RLock()
		_, cpuExists := m.activeAlerts["pbs1-cpu"]
		_, memExists := m.activeAlerts["pbs1-memory"]
		m.mu.RUnlock()

		if cpuExists {
			t.Error("expected no CPU alert when offline")
		}
		if memExists {
			t.Error("expected no memory alert when offline")
		}
	})

	t.Run("applies override thresholds", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.NodeDefaults = ThresholdConfig{
			CPU: &HysteresisThreshold{Trigger: 80.0, Clear: 70.0},
		}
		m.config.Overrides = map[string]ThresholdConfig{
			"pbs1": {
				CPU: &HysteresisThreshold{Trigger: 99.0, Clear: 95.0}, // Higher threshold
			},
		}
		m.mu.Unlock()

		pbs := models.PBSInstance{
			ID:     "pbs1",
			Name:   "testpbs",
			Status: "online",
			CPU:    95.0, // Below override trigger
		}

		m.CheckPBS(pbs)

		m.mu.RLock()
		_, exists := m.activeAlerts["pbs1-cpu"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected no alert due to higher override threshold")
		}
	})

	t.Run("checks offline status", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		// Pre-populate confirmation count to bypass waiting period
		m.offlineConfirmations["pbs1"] = 2
		m.mu.Unlock()

		pbs := models.PBSInstance{
			ID:     "pbs1",
			Name:   "testpbs",
			Status: "offline",
		}

		m.CheckPBS(pbs)

		m.mu.RLock()
		alert := m.activeAlerts["pbs-offline-pbs1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected offline alert")
		}
		if alert.Type != "offline" {
			t.Errorf("expected offline type, got %s", alert.Type)
		}
	})

	t.Run("checks connection health error", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		// Pre-populate confirmation count to bypass waiting period
		m.offlineConfirmations["pbs1"] = 2
		m.mu.Unlock()

		pbs := models.PBSInstance{
			ID:               "pbs1",
			Name:             "testpbs",
			Status:           "online",
			ConnectionHealth: "error",
		}

		m.CheckPBS(pbs)

		m.mu.RLock()
		alert := m.activeAlerts["pbs-offline-pbs1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected offline alert for connection health error")
		}
	})

	t.Run("checks connection health unhealthy", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		// Pre-populate confirmation count to bypass waiting period
		m.offlineConfirmations["pbs1"] = 2
		m.mu.Unlock()

		pbs := models.PBSInstance{
			ID:               "pbs1",
			Name:             "testpbs",
			Status:           "online",
			ConnectionHealth: "unhealthy",
		}

		m.CheckPBS(pbs)

		m.mu.RLock()
		alert := m.activeAlerts["pbs-offline-pbs1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected offline alert for connection health unhealthy")
		}
	})

	t.Run("clears offline alert when back online", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["pbs-offline-pbs1"] = &Alert{ID: "pbs-offline-pbs1", Type: "connectivity"}
		m.offlineConfirmations["pbs1"] = 5
		m.mu.Unlock()

		pbs := models.PBSInstance{
			ID:               "pbs1",
			Name:             "testpbs",
			Status:           "online",
			ConnectionHealth: "healthy",
		}

		m.CheckPBS(pbs)

		m.mu.RLock()
		_, offlineExists := m.activeAlerts["pbs-offline-pbs1"]
		_, confirmExists := m.offlineConfirmations["pbs1"]
		m.mu.RUnlock()

		if offlineExists {
			t.Error("expected offline alert to be cleared when back online")
		}
		if confirmExists {
			t.Error("expected offline confirmation to be cleared")
		}
	})
}

func TestCheckPMGComprehensive(t *testing.T) {
	// t.Parallel()

	t.Run("returns early when alerts disabled", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.Enabled = false
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "testpmg",
		}

		m.CheckPMG(pmg)

		m.mu.RLock()
		alertCount := len(m.activeAlerts)
		m.mu.RUnlock()

		if alertCount != 0 {
			t.Errorf("expected no alerts when disabled, got %d", alertCount)
		}
	})

	t.Run("DisableAllPMG clears existing alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["pmg1-queue-total"] = &Alert{ID: "pmg1-queue-total", Type: "queue-total"}
		m.activeAlerts["pmg1-queue-deferred"] = &Alert{ID: "pmg1-queue-deferred", Type: "queue-deferred"}
		m.activeAlerts["pmg1-queue-hold"] = &Alert{ID: "pmg1-queue-hold", Type: "queue-hold"}
		m.activeAlerts["pmg1-oldest-message"] = &Alert{ID: "pmg1-oldest-message", Type: "oldest-message"}
		m.activeAlerts["pmg-offline-pmg1"] = &Alert{ID: "pmg-offline-pmg1", Type: "connectivity"}
		m.offlineConfirmations["pmg1"] = 3
		m.config.DisableAllPMG = true
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "testpmg",
		}

		m.CheckPMG(pmg)

		m.mu.RLock()
		_, queueTotalExists := m.activeAlerts["pmg1-queue-total"]
		_, queueDeferredExists := m.activeAlerts["pmg1-queue-deferred"]
		_, queueHoldExists := m.activeAlerts["pmg1-queue-hold"]
		_, oldestMsgExists := m.activeAlerts["pmg1-oldest-message"]
		_, offlineExists := m.activeAlerts["pmg-offline-pmg1"]
		_, confirmExists := m.offlineConfirmations["pmg1"]
		m.mu.RUnlock()

		if queueTotalExists {
			t.Error("expected queue-total alert to be cleared")
		}
		if queueDeferredExists {
			t.Error("expected queue-deferred alert to be cleared")
		}
		if queueHoldExists {
			t.Error("expected queue-hold alert to be cleared")
		}
		if oldestMsgExists {
			t.Error("expected oldest-message alert to be cleared")
		}
		if offlineExists {
			t.Error("expected offline alert to be cleared")
		}
		if confirmExists {
			t.Error("expected offline confirmation to be cleared")
		}
	})

	t.Run("override with Disabled clears alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["pmg1-queue-total"] = &Alert{ID: "pmg1-queue-total", Type: "queue-total"}
		m.activeAlerts["pmg1-oldest-message"] = &Alert{ID: "pmg1-oldest-message", Type: "oldest-message"}
		m.activeAlerts["pmg-offline-pmg1"] = &Alert{ID: "pmg-offline-pmg1", Type: "connectivity"}
		m.offlineConfirmations["pmg1"] = 3
		m.config.Overrides = map[string]ThresholdConfig{
			"pmg1": {Disabled: true},
		}
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "testpmg",
		}

		m.CheckPMG(pmg)

		m.mu.RLock()
		_, queueExists := m.activeAlerts["pmg1-queue-total"]
		_, oldestExists := m.activeAlerts["pmg1-oldest-message"]
		_, offlineExists := m.activeAlerts["pmg-offline-pmg1"]
		_, confirmExists := m.offlineConfirmations["pmg1"]
		m.mu.RUnlock()

		if queueExists {
			t.Error("expected queue alert to be cleared")
		}
		if oldestExists {
			t.Error("expected oldest-message alert to be cleared")
		}
		if offlineExists {
			t.Error("expected offline alert to be cleared")
		}
		if confirmExists {
			t.Error("expected offline confirmation to be cleared")
		}
	})

	t.Run("DisableAllPMGOffline clears offline alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["pmg-offline-pmg1"] = &Alert{ID: "pmg-offline-pmg1", Type: "connectivity"}
		m.offlineConfirmations["pmg1"] = 3
		m.config.DisableAllPMGOffline = true
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:     "pmg1",
			Name:   "testpmg",
			Status: "offline",
		}

		m.CheckPMG(pmg)

		m.mu.RLock()
		_, offlineExists := m.activeAlerts["pmg-offline-pmg1"]
		_, confirmExists := m.offlineConfirmations["pmg1"]
		m.mu.RUnlock()

		if offlineExists {
			t.Error("expected offline alert to be cleared when DisableAllPMGOffline is true")
		}
		if confirmExists {
			t.Error("expected offline confirmation to be cleared")
		}
	})

	t.Run("checks offline status", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		// Pre-populate confirmation count to bypass waiting period (3 required)
		m.offlineConfirmations["pmg1"] = 2
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:     "pmg1",
			Name:   "testpmg",
			Status: "offline",
		}

		m.CheckPMG(pmg)

		m.mu.RLock()
		alert := m.activeAlerts["pmg-offline-pmg1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected offline alert")
		}
		if alert.Type != "offline" {
			t.Errorf("expected offline type, got %s", alert.Type)
		}
	})

	t.Run("checks connection health error", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		// Pre-populate confirmation count to bypass waiting period
		m.offlineConfirmations["pmg1"] = 2
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:               "pmg1",
			Name:             "testpmg",
			Status:           "online",
			ConnectionHealth: "error",
		}

		m.CheckPMG(pmg)

		m.mu.RLock()
		alert := m.activeAlerts["pmg-offline-pmg1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected offline alert for connection health error")
		}
	})

	t.Run("checks connection health unhealthy", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		// Pre-populate confirmation count to bypass waiting period
		m.offlineConfirmations["pmg1"] = 2
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:               "pmg1",
			Name:             "testpmg",
			Status:           "online",
			ConnectionHealth: "unhealthy",
		}

		m.CheckPMG(pmg)

		m.mu.RLock()
		alert := m.activeAlerts["pmg-offline-pmg1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected offline alert for connection health unhealthy")
		}
	})

	t.Run("clears offline alert when back online", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["pmg-offline-pmg1"] = &Alert{ID: "pmg-offline-pmg1", Type: "connectivity"}
		m.offlineConfirmations["pmg1"] = 5
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:               "pmg1",
			Name:             "testpmg",
			Status:           "online",
			ConnectionHealth: "healthy",
		}

		m.CheckPMG(pmg)

		m.mu.RLock()
		_, offlineExists := m.activeAlerts["pmg-offline-pmg1"]
		_, confirmExists := m.offlineConfirmations["pmg1"]
		m.mu.RUnlock()

		if offlineExists {
			t.Error("expected offline alert to be cleared when back online")
		}
		if confirmExists {
			t.Error("expected offline confirmation to be cleared")
		}
	})

	t.Run("skips metrics when PMG is offline", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		pmg := models.PMGInstance{
			ID:     "pmg1",
			Name:   "testpmg",
			Status: "offline",
		}

		m.CheckPMG(pmg)

		m.mu.RLock()
		var queueAlertCount int
		for alertID := range m.activeAlerts {
			if strings.Contains(alertID, "pmg1-queue") || strings.Contains(alertID, "pmg1-oldest") {
				queueAlertCount++
			}
		}
		m.mu.RUnlock()

		if queueAlertCount != 0 {
			t.Error("expected no queue alerts when offline")
		}
	})
}

func TestCheckStorageComprehensive(t *testing.T) {
	// t.Parallel()

	t.Run("returns early when alerts disabled", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.Enabled = false
		m.mu.Unlock()

		storage := models.Storage{
			ID:     "storage1",
			Name:   "teststorage",
			Status: "active",
			Usage:  95.0,
		}

		m.CheckStorage(storage)

		m.mu.RLock()
		alertCount := len(m.activeAlerts)
		m.mu.RUnlock()

		if alertCount != 0 {
			t.Errorf("expected no alerts when disabled, got %d", alertCount)
		}
	})

	t.Run("DisableAllStorage clears existing alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["storage1-usage"] = &Alert{ID: "storage1-usage", Type: "usage"}
		m.activeAlerts["storage-offline-storage1"] = &Alert{ID: "storage-offline-storage1", Type: "connectivity"}
		m.config.DisableAllStorage = true
		m.mu.Unlock()

		storage := models.Storage{
			ID:     "storage1",
			Name:   "teststorage",
			Status: "active",
		}

		m.CheckStorage(storage)

		m.mu.RLock()
		_, usageExists := m.activeAlerts["storage1-usage"]
		_, offlineExists := m.activeAlerts["storage-offline-storage1"]
		m.mu.RUnlock()

		if usageExists {
			t.Error("expected usage alert to be cleared")
		}
		if offlineExists {
			t.Error("expected offline alert to be cleared")
		}
	})

	t.Run("override with Disabled clears alerts", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["storage1-usage"] = &Alert{ID: "storage1-usage", Type: "usage"}
		m.activeAlerts["storage-offline-storage1"] = &Alert{ID: "storage-offline-storage1", Type: "connectivity"}
		m.config.Overrides = map[string]ThresholdConfig{
			"storage1": {Disabled: true},
		}
		m.mu.Unlock()

		storage := models.Storage{
			ID:     "storage1",
			Name:   "teststorage",
			Status: "active",
		}

		m.CheckStorage(storage)

		m.mu.RLock()
		_, usageExists := m.activeAlerts["storage1-usage"]
		_, offlineExists := m.activeAlerts["storage-offline-storage1"]
		m.mu.RUnlock()

		if usageExists {
			t.Error("expected usage alert to be cleared")
		}
		if offlineExists {
			t.Error("expected offline alert to be cleared")
		}
	})

	t.Run("checks usage threshold", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.StorageDefault = HysteresisThreshold{Trigger: 80.0, Clear: 70.0}
		m.mu.Unlock()

		storage := models.Storage{
			ID:     "storage1",
			Name:   "teststorage",
			Node:   "node1",
			Status: "active",
			Usage:  95.0,
		}

		m.CheckStorage(storage)

		m.mu.RLock()
		alert := m.activeAlerts["storage1-usage"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected usage alert")
		}
	})

	t.Run("applies override threshold", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.StorageDefault = HysteresisThreshold{Trigger: 80.0, Clear: 70.0}
		overrideThreshold := HysteresisThreshold{Trigger: 99.0, Clear: 95.0}
		m.config.Overrides = map[string]ThresholdConfig{
			"storage1": {Usage: &overrideThreshold},
		}
		m.mu.Unlock()

		storage := models.Storage{
			ID:     "storage1",
			Name:   "teststorage",
			Status: "active",
			Usage:  95.0, // Below override threshold
		}

		m.CheckStorage(storage)

		m.mu.RLock()
		_, exists := m.activeAlerts["storage1-usage"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected no alert due to higher override threshold")
		}
	})

	t.Run("applies legacy shared storage override threshold", func(t *testing.T) {
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.StorageDefault = HysteresisThreshold{Trigger: 80.0, Clear: 70.0}
		overrideThreshold := HysteresisThreshold{Trigger: 10.0, Clear: 5.0}
		m.config.Overrides = map[string]ThresholdConfig{
			"Main-pve1-ceph-pool": {Usage: &overrideThreshold},
		}
		m.mu.Unlock()

		storage := models.Storage{
			ID:       "Main-cluster-ceph-pool",
			Name:     "ceph-pool",
			Node:     "cluster",
			Instance: "Main",
			Status:   "available",
			Usage:    20.0,
			Shared:   true,
			Nodes:    []string{"pve1", "pve2"},
			NodeIDs:  []string{"Main-pve1", "Main-pve2"},
		}

		m.CheckStorage(storage)

		m.mu.RLock()
		alert := m.activeAlerts["Main-cluster-ceph-pool-usage"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected usage alert when legacy shared-storage override matches canonical storage ID")
		}
	})

	t.Run("skips usage check when offline", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.StorageDefault = HysteresisThreshold{Trigger: 80.0, Clear: 70.0}
		m.mu.Unlock()

		storage := models.Storage{
			ID:     "storage1",
			Name:   "teststorage",
			Status: "offline",
			Usage:  95.0,
		}

		m.CheckStorage(storage)

		m.mu.RLock()
		_, exists := m.activeAlerts["storage1-usage"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected no usage alert when offline")
		}
	})

	t.Run("skips usage check when unavailable", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.StorageDefault = HysteresisThreshold{Trigger: 80.0, Clear: 70.0}
		m.mu.Unlock()

		storage := models.Storage{
			ID:     "storage1",
			Name:   "teststorage",
			Status: "unavailable",
			Usage:  95.0,
		}

		m.CheckStorage(storage)

		m.mu.RLock()
		_, exists := m.activeAlerts["storage1-usage"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected no usage alert when unavailable")
		}
	})

	t.Run("checks offline status", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		// Pre-populate confirmation count (requires 2)
		m.offlineConfirmations["storage1"] = 1
		m.mu.Unlock()

		storage := models.Storage{
			ID:     "storage1",
			Name:   "teststorage",
			Status: "offline",
		}

		m.CheckStorage(storage)

		m.mu.RLock()
		alert := m.activeAlerts["storage-offline-storage1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected offline alert")
		}
	})

	t.Run("checks unavailable status", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		// Pre-populate confirmation count (requires 2)
		m.offlineConfirmations["storage1"] = 1
		m.mu.Unlock()

		storage := models.Storage{
			ID:     "storage1",
			Name:   "teststorage",
			Status: "unavailable",
		}

		m.CheckStorage(storage)

		m.mu.RLock()
		alert := m.activeAlerts["storage-offline-storage1"]
		m.mu.RUnlock()

		if alert == nil {
			t.Fatal("expected offline alert for unavailable status")
		}
	})

	t.Run("clears offline alert when back online", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.activeAlerts["storage-offline-storage1"] = &Alert{ID: "storage-offline-storage1", Type: "connectivity"}
		m.offlineConfirmations["storage1"] = 5
		m.mu.Unlock()

		storage := models.Storage{
			ID:     "storage1",
			Name:   "teststorage",
			Status: "active",
		}

		m.CheckStorage(storage)

		m.mu.RLock()
		_, offlineExists := m.activeAlerts["storage-offline-storage1"]
		_, confirmExists := m.offlineConfirmations["storage1"]
		m.mu.RUnlock()

		if offlineExists {
			t.Error("expected offline alert to be cleared when back online")
		}
		if confirmExists {
			t.Error("expected offline confirmation to be cleared")
		}
	})

	t.Run("skips usage check when usage is zero", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		m.mu.Lock()
		m.config.TimeThreshold = 0
		m.config.TimeThresholds = map[string]int{}
		m.config.StorageDefault = HysteresisThreshold{Trigger: 80.0, Clear: 70.0}
		m.mu.Unlock()

		storage := models.Storage{
			ID:     "storage1",
			Name:   "teststorage",
			Status: "active",
			Usage:  0, // No usage data
		}

		m.CheckStorage(storage)

		m.mu.RLock()
		_, exists := m.activeAlerts["storage1-usage"]
		m.mu.RUnlock()

		if exists {
			t.Error("expected no usage alert when usage is zero")
		}
	})
}

func TestDispatchAlert(t *testing.T) {
	// t.Parallel()

	t.Run("returns false when onAlert is nil", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		alert := &Alert{
			ID:   "test-alert",
			Type: "cpu",
		}

		result := m.dispatchAlert(alert, false)

		if result {
			t.Error("expected false when onAlert callback is nil")
		}
	})

	t.Run("returns false when alert is nil", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		called := false
		m.SetAlertCallback(func(a *Alert) {
			called = true
		})

		result := m.dispatchAlert(nil, false)

		if result {
			t.Error("expected false when alert is nil")
		}
		if called {
			t.Error("callback should not be called for nil alert")
		}
	})

	t.Run("returns false when activation state is pending", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		called := false
		m.SetAlertCallback(func(a *Alert) {
			called = true
		})

		m.mu.Lock()
		m.config.ActivationState = ActivationPending
		m.mu.Unlock()

		alert := &Alert{
			ID:   "test-alert",
			Type: "cpu",
		}

		result := m.dispatchAlert(alert, false)

		if result {
			t.Error("expected false when activation is pending")
		}
		if called {
			t.Error("callback should not be called when pending")
		}
	})

	t.Run("returns false when activation state is snoozed", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		called := false
		m.SetAlertCallback(func(a *Alert) {
			called = true
		})

		m.mu.Lock()
		m.config.ActivationState = ActivationSnoozed
		m.mu.Unlock()

		alert := &Alert{
			ID:   "test-alert",
			Type: "cpu",
		}

		result := m.dispatchAlert(alert, false)

		if result {
			t.Error("expected false when activation is snoozed")
		}
		if called {
			t.Error("callback should not be called when snoozed")
		}
	})

	t.Run("returns false for monitor-only alert", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		called := false
		m.SetAlertCallback(func(a *Alert) {
			called = true
		})

		m.mu.Lock()
		m.config.ActivationState = ActivationActive
		m.mu.Unlock()

		alert := &Alert{
			ID:       "test-alert",
			Type:     "cpu",
			Metadata: map[string]interface{}{"monitorOnly": true},
		}

		result := m.dispatchAlert(alert, false)

		if result {
			t.Error("expected false for monitor-only alert")
		}
		if called {
			t.Error("callback should not be called for monitor-only alert")
		}
	})

	t.Run("dispatches synchronously when async is false", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		var receivedAlert *Alert
		m.SetAlertCallback(func(a *Alert) {
			receivedAlert = a
		})

		m.mu.Lock()
		m.config.ActivationState = ActivationActive
		m.mu.Unlock()

		alert := &Alert{
			ID:           "test-alert",
			Type:         "cpu",
			ResourceName: "testvm",
		}

		result := m.dispatchAlert(alert, false)

		if !result {
			t.Error("expected true for successful dispatch")
		}
		if receivedAlert == nil {
			t.Fatal("callback should have been called")
		}
		if receivedAlert.ID != alert.ID {
			t.Error("alert ID should match")
		}
	})

	t.Run("dispatches asynchronously when async is true", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		var receivedAlert *Alert
		done := make(chan struct{})
		m.SetAlertCallback(func(a *Alert) {
			receivedAlert = a
			close(done)
		})

		m.mu.Lock()
		m.config.ActivationState = ActivationActive
		m.mu.Unlock()

		alert := &Alert{
			ID:           "test-alert",
			Type:         "cpu",
			ResourceName: "testvm",
		}

		result := m.dispatchAlert(alert, true)

		if !result {
			t.Error("expected true for successful dispatch")
		}

		// Wait for async callback
		select {
		case <-done:
			// Success
		case <-time.After(time.Second):
			t.Fatal("async callback not called within timeout")
		}

		if receivedAlert == nil {
			t.Fatal("callback should have been called")
		}
		if receivedAlert.ID != alert.ID {
			t.Error("alert ID should match")
		}
	})

	t.Run("clones alert before dispatch", func(t *testing.T) {
		// t.Parallel()
		m := newTestManager(t)

		var receivedAlert *Alert
		m.SetAlertCallback(func(a *Alert) {
			receivedAlert = a
		})

		m.mu.Lock()
		m.config.ActivationState = ActivationActive
		m.mu.Unlock()

		alert := &Alert{
			ID:           "test-alert",
			Type:         "cpu",
			ResourceName: "testvm",
		}

		m.dispatchAlert(alert, false)

		if receivedAlert == alert {
			t.Error("alert should be cloned, not passed directly")
		}
	})
}

func TestPreserveAlertState(t *testing.T) {
	t.Run("nil updated alert is handled", func(t *testing.T) {
		m := newTestManager(t)
		// Should not panic
		m.preserveAlertState("test-id", nil)
	})

	t.Run("preserves state from existing alert", func(t *testing.T) {
		m := newTestManager(t)

		ackTime := time.Now().Add(-30 * time.Minute)
		existing := &Alert{
			ID:              "test-alert",
			Type:            "cpu",
			StartTime:       time.Now().Add(-1 * time.Hour),
			Acknowledged:    true,
			AckUser:         "testuser",
			AckTime:         &ackTime,
			LastEscalation:  2,
			EscalationTimes: []time.Time{time.Now().Add(-25 * time.Minute)},
		}

		m.mu.Lock()
		m.activeAlerts["test-alert"] = existing
		m.mu.Unlock()

		updated := &Alert{
			ID:        "test-alert",
			Type:      "cpu",
			StartTime: time.Now(), // Different start time
		}

		m.preserveAlertState("test-alert", updated)

		if !updated.StartTime.Equal(existing.StartTime) {
			t.Error("StartTime should be preserved from existing alert")
		}
		if !updated.Acknowledged {
			t.Error("Acknowledged should be preserved")
		}
		if updated.AckUser != "testuser" {
			t.Errorf("AckUser should be preserved, got %s", updated.AckUser)
		}
		if updated.AckTime == nil || !updated.AckTime.Equal(ackTime) {
			t.Error("AckTime should be preserved")
		}
		if updated.LastEscalation != 2 {
			t.Error("LastEscalation should be preserved")
		}
		if len(updated.EscalationTimes) != 1 {
			t.Error("EscalationTimes should be preserved")
		}
	})

	t.Run("falls back to ackState for new alert", func(t *testing.T) {
		m := newTestManager(t)

		ackTime := time.Now().Add(-15 * time.Minute)
		m.mu.Lock()
		m.ackState["test-alert"] = ackRecord{
			acknowledged: true,
			user:         "fallbackuser",
			time:         ackTime,
		}
		m.mu.Unlock()

		updated := &Alert{
			ID:        "test-alert",
			Type:      "cpu",
			StartTime: time.Now(),
		}

		m.preserveAlertState("test-alert", updated)

		if !updated.Acknowledged {
			t.Error("Acknowledged should be set from ackState")
		}
		if updated.AckUser != "fallbackuser" {
			t.Errorf("AckUser should be from ackState, got %s", updated.AckUser)
		}
		if updated.AckTime == nil || !updated.AckTime.Equal(ackTime) {
			t.Error("AckTime should be from ackState")
		}
	})

	t.Run("no state to preserve for new alert", func(t *testing.T) {
		m := newTestManager(t)

		startTime := time.Now()
		updated := &Alert{
			ID:        "new-alert",
			Type:      "cpu",
			StartTime: startTime,
		}

		m.preserveAlertState("new-alert", updated)

		if !updated.StartTime.Equal(startTime) {
			t.Error("StartTime should remain unchanged for new alert")
		}
		if updated.Acknowledged {
			t.Error("Acknowledged should remain false for new alert")
		}
	})
}

func TestCheckPMGQuarantineBacklog(t *testing.T) {
	t.Run("nil quarantine clears alerts", func(t *testing.T) {
		m := newTestManager(t)

		// Create an existing quarantine alert
		m.mu.Lock()
		m.activeAlerts["pmg1-quarantine-spam"] = &Alert{
			ID:   "pmg1-quarantine-spam",
			Type: "quarantine-spam",
		}
		m.activeAlerts["pmg1-quarantine-virus"] = &Alert{
			ID:   "pmg1-quarantine-virus",
			Type: "quarantine-virus",
		}
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:         "pmg1",
			Name:       "pmg-server",
			Host:       "pmg.example.com",
			Quarantine: nil,
		}

		m.checkPMGQuarantineBacklog(pmg, PMGThresholdConfig{})

		m.mu.RLock()
		_, spamExists := m.activeAlerts["pmg1-quarantine-spam"]
		_, virusExists := m.activeAlerts["pmg1-quarantine-virus"]
		m.mu.RUnlock()

		if spamExists {
			t.Error("spam alert should be cleared when quarantine is nil")
		}
		if virusExists {
			t.Error("virus alert should be cleared when quarantine is nil")
		}
	})

	t.Run("warning threshold triggers alert", func(t *testing.T) {
		m := newTestManager(t)
		m.ClearActiveAlerts()
		m.mu.Lock()
		m.pmgQuarantineHistory = make(map[string][]pmgQuarantineSnapshot)
		m.config.ActivationState = ActivationActive
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "pmg-server",
			Host: "pmg.example.com",
			Quarantine: &models.PMGQuarantineTotals{
				Spam:  2500, // Above warning threshold
				Virus: 100,
			},
		}

		thresholds := PMGThresholdConfig{
			QuarantineSpamWarn:      2000,
			QuarantineSpamCritical:  5000,
			QuarantineVirusWarn:     2000,
			QuarantineVirusCritical: 5000,
		}

		m.checkPMGQuarantineBacklog(pmg, thresholds)

		m.mu.RLock()
		alert, exists := m.activeAlerts["pmg1-quarantine-spam"]
		m.mu.RUnlock()

		if !exists {
			t.Fatal("spam quarantine warning alert should be created")
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("alert level should be warning, got %s", alert.Level)
		}
	})

	t.Run("critical threshold triggers alert", func(t *testing.T) {
		m := newTestManager(t)
		m.ClearActiveAlerts()
		m.mu.Lock()
		m.pmgQuarantineHistory = make(map[string][]pmgQuarantineSnapshot)
		m.config.ActivationState = ActivationActive
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "pmg-server",
			Host: "pmg.example.com",
			Quarantine: &models.PMGQuarantineTotals{
				Spam:  6000, // Above critical threshold
				Virus: 100,
			},
		}

		thresholds := PMGThresholdConfig{
			QuarantineSpamWarn:      2000,
			QuarantineSpamCritical:  5000,
			QuarantineVirusWarn:     2000,
			QuarantineVirusCritical: 5000,
		}

		m.checkPMGQuarantineBacklog(pmg, thresholds)

		m.mu.RLock()
		alert, exists := m.activeAlerts["pmg1-quarantine-spam"]
		m.mu.RUnlock()

		if !exists {
			t.Fatal("spam quarantine critical alert should be created")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("alert level should be critical, got %s", alert.Level)
		}
	})

	t.Run("below threshold clears alert", func(t *testing.T) {
		m := newTestManager(t)
		m.ClearActiveAlerts()
		m.mu.Lock()
		m.pmgQuarantineHistory = make(map[string][]pmgQuarantineSnapshot)
		m.activeAlerts["pmg1-quarantine-spam"] = &Alert{
			ID:    "pmg1-quarantine-spam",
			Type:  "quarantine-spam",
			Level: AlertLevelWarning,
		}
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "pmg-server",
			Host: "pmg.example.com",
			Quarantine: &models.PMGQuarantineTotals{
				Spam:  500, // Below warning threshold
				Virus: 100,
			},
		}

		thresholds := PMGThresholdConfig{
			QuarantineSpamWarn:     2000,
			QuarantineSpamCritical: 5000,
		}

		m.checkPMGQuarantineBacklog(pmg, thresholds)

		m.mu.RLock()
		_, exists := m.activeAlerts["pmg1-quarantine-spam"]
		m.mu.RUnlock()

		if exists {
			t.Error("spam quarantine alert should be cleared when below threshold")
		}
	})

	t.Run("growth rate triggers warning alert", func(t *testing.T) {
		m := newTestManager(t)
		m.ClearActiveAlerts()
		m.mu.Lock()
		m.config.ActivationState = ActivationActive
		// Set up history from ~2 hours ago
		m.pmgQuarantineHistory = map[string][]pmgQuarantineSnapshot{
			"pmg1": {
				{
					Spam:      1000,
					Virus:     100,
					Timestamp: time.Now().Add(-2 * time.Hour),
				},
			},
		}
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "pmg-server",
			Host: "pmg.example.com",
			Quarantine: &models.PMGQuarantineTotals{
				Spam:  1500, // 50% growth (500 messages)
				Virus: 100,
			},
		}

		thresholds := PMGThresholdConfig{
			QuarantineSpamWarn:      10000, // High absolute threshold (won't trigger)
			QuarantineSpamCritical:  20000,
			QuarantineGrowthWarnPct: 25,  // 25% growth warning
			QuarantineGrowthWarnMin: 250, // Minimum 250 messages
			QuarantineGrowthCritPct: 50,  // 50% growth critical
			QuarantineGrowthCritMin: 500, // Minimum 500 messages
		}

		m.checkPMGQuarantineBacklog(pmg, thresholds)

		m.mu.RLock()
		alert, exists := m.activeAlerts["pmg1-quarantine-spam"]
		m.mu.RUnlock()

		if !exists {
			t.Fatal("spam quarantine growth alert should be created")
		}
		if alert.Level != AlertLevelCritical {
			t.Errorf("alert level should be critical due to 50%% growth + 500 messages, got %s", alert.Level)
		}
	})

	t.Run("updates existing alert", func(t *testing.T) {
		m := newTestManager(t)
		m.ClearActiveAlerts()
		m.mu.Lock()
		m.pmgQuarantineHistory = make(map[string][]pmgQuarantineSnapshot)
		m.config.ActivationState = ActivationActive
		m.activeAlerts["pmg1-quarantine-spam"] = &Alert{
			ID:        "pmg1-quarantine-spam",
			Type:      "quarantine-spam",
			Level:     AlertLevelWarning,
			Value:     2500,
			Threshold: 2000,
			LastSeen:  time.Now().Add(-5 * time.Minute),
		}
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "pmg-server",
			Host: "pmg.example.com",
			Quarantine: &models.PMGQuarantineTotals{
				Spam:  3000, // Higher spam count
				Virus: 100,
			},
		}

		thresholds := PMGThresholdConfig{
			QuarantineSpamWarn:     2000,
			QuarantineSpamCritical: 5000,
		}

		m.checkPMGQuarantineBacklog(pmg, thresholds)

		m.mu.RLock()
		alert, exists := m.activeAlerts["pmg1-quarantine-spam"]
		m.mu.RUnlock()

		if !exists {
			t.Fatal("spam quarantine alert should still exist")
		}
		if alert.Value != 3000 {
			t.Errorf("alert value should be updated to 3000, got %.0f", alert.Value)
		}
	})

	t.Run("virus quarantine alert", func(t *testing.T) {
		m := newTestManager(t)
		m.ClearActiveAlerts()
		m.mu.Lock()
		m.pmgQuarantineHistory = make(map[string][]pmgQuarantineSnapshot)
		m.config.ActivationState = ActivationActive
		m.mu.Unlock()

		pmg := models.PMGInstance{
			ID:   "pmg1",
			Name: "pmg-server",
			Host: "pmg.example.com",
			Quarantine: &models.PMGQuarantineTotals{
				Spam:  100,
				Virus: 3000, // Above virus warning threshold
			},
		}

		thresholds := PMGThresholdConfig{
			QuarantineSpamWarn:      2000,
			QuarantineSpamCritical:  5000,
			QuarantineVirusWarn:     2000,
			QuarantineVirusCritical: 5000,
		}

		m.checkPMGQuarantineBacklog(pmg, thresholds)

		m.mu.RLock()
		alert, exists := m.activeAlerts["pmg1-quarantine-virus"]
		m.mu.RUnlock()

		if !exists {
			t.Fatal("virus quarantine warning alert should be created")
		}
		if alert.Level != AlertLevelWarning {
			t.Errorf("alert level should be warning, got %s", alert.Level)
		}
	})
}

func TestLoadActiveAlerts(t *testing.T) {
	t.Run("no file returns nil error", func(t *testing.T) {
		m := newTestManager(t)
		m.ClearActiveAlerts()

		err := m.LoadActiveAlerts()
		if err != nil {
			t.Errorf("expected no error when file doesn't exist, got %v", err)
		}
	})

	t.Run("loads alerts from valid file", func(t *testing.T) {
		m := newTestManager(t)

		// Create an alert and save it
		startTime := time.Now().Add(-30 * time.Minute)
		alert := &Alert{
			ID:           "test-load-alert",
			Type:         "cpu",
			Level:        AlertLevelWarning,
			ResourceID:   "test-resource",
			ResourceName: "test-vm",
			Node:         "node1",
			Instance:     "pve1",
			Message:      "Test alert",
			Value:        85.0,
			Threshold:    80.0,
			StartTime:    startTime,
			LastSeen:     time.Now(),
		}

		m.mu.Lock()
		m.activeAlerts[alert.ID] = alert
		m.mu.Unlock()

		// Save to disk
		_ = m.SaveActiveAlerts()

		// Clear in-memory map only (don't use ClearActiveAlerts which triggers async save)
		m.mu.Lock()
		m.activeAlerts = make(map[string]*Alert)
		m.mu.Unlock()

		err := m.LoadActiveAlerts()
		if err != nil {
			t.Fatalf("failed to load alerts: %v", err)
		}

		m.mu.RLock()
		loaded, exists := m.activeAlerts["test-load-alert"]
		m.mu.RUnlock()

		if !exists {
			t.Fatal("alert should be loaded from file")
		}
		if loaded.Type != "cpu" {
			t.Errorf("loaded alert type should be cpu, got %s", loaded.Type)
		}
		if loaded.Value != 85.0 {
			t.Errorf("loaded alert value should be 85.0, got %.1f", loaded.Value)
		}
	})

	t.Run("skips old alerts", func(t *testing.T) {
		m := newTestManager(t)

		// Create an old alert (>24 hours)
		startTime := time.Now().Add(-25 * time.Hour)
		alert := &Alert{
			ID:           "old-alert",
			Type:         "cpu",
			Level:        AlertLevelWarning,
			ResourceID:   "test-resource",
			ResourceName: "test-vm",
			StartTime:    startTime,
			LastSeen:     startTime,
		}

		m.mu.Lock()
		m.activeAlerts[alert.ID] = alert
		m.mu.Unlock()

		// Save to disk
		_ = m.SaveActiveAlerts()

		// Clear in-memory map only (don't use ClearActiveAlerts which triggers async save)
		m.mu.Lock()
		m.activeAlerts = make(map[string]*Alert)
		m.mu.Unlock()

		err := m.LoadActiveAlerts()
		if err != nil {
			t.Fatalf("failed to load alerts: %v", err)
		}

		m.mu.RLock()
		_, exists := m.activeAlerts["old-alert"]
		m.mu.RUnlock()

		if exists {
			t.Error("old alert (>24h) should be skipped during load")
		}
	})

	t.Run("skips old acknowledged alerts", func(t *testing.T) {
		m := newTestManager(t)

		// Create an alert acknowledged >1 hour ago
		startTime := time.Now().Add(-30 * time.Minute)
		ackTime := time.Now().Add(-2 * time.Hour)
		alert := &Alert{
			ID:           "old-ack-alert",
			Type:         "cpu",
			Level:        AlertLevelWarning,
			ResourceID:   "test-resource",
			ResourceName: "test-vm",
			StartTime:    startTime,
			LastSeen:     time.Now(),
			Acknowledged: true,
			AckTime:      &ackTime,
			AckUser:      "testuser",
		}

		m.mu.Lock()
		m.activeAlerts[alert.ID] = alert
		m.mu.Unlock()

		// Save to disk
		_ = m.SaveActiveAlerts()

		// Clear in-memory map only (don't use ClearActiveAlerts which triggers async save)
		m.mu.Lock()
		m.activeAlerts = make(map[string]*Alert)
		m.mu.Unlock()

		err := m.LoadActiveAlerts()
		if err != nil {
			t.Fatalf("failed to load alerts: %v", err)
		}

		m.mu.RLock()
		_, exists := m.activeAlerts["old-ack-alert"]
		ackRecord, ackExists := m.ackState["old-ack-alert"]
		m.mu.RUnlock()

		if exists {
			t.Error("old acknowledged alert (>1h) should be skipped from activeAlerts")
		}

		// But ackState should be preserved so the alert doesn't retrigger if it reappears
		if !ackExists {
			t.Error("ackState should be preserved for old acknowledged alerts to prevent retriggering")
		}
		if ackExists && !ackRecord.acknowledged {
			t.Error("ackState.acknowledged should be true")
		}
		if ackExists && ackRecord.user != "testuser" {
			t.Errorf("ackState.user should be 'testuser', got %q", ackRecord.user)
		}
	})

	t.Run("restores acknowledgment state", func(t *testing.T) {
		m := newTestManager(t)

		// Create an acknowledged alert
		startTime := time.Now().Add(-10 * time.Minute)
		ackTime := time.Now().Add(-5 * time.Minute)
		alert := &Alert{
			ID:           "ack-alert",
			Type:         "cpu",
			Level:        AlertLevelWarning,
			ResourceID:   "test-resource",
			ResourceName: "test-vm",
			StartTime:    startTime,
			LastSeen:     time.Now(),
			Acknowledged: true,
			AckTime:      &ackTime,
			AckUser:      "testuser",
		}

		m.mu.Lock()
		m.activeAlerts[alert.ID] = alert
		m.mu.Unlock()

		// Save to disk
		_ = m.SaveActiveAlerts()

		// Clear in-memory maps only (don't use ClearActiveAlerts which triggers async save)
		m.mu.Lock()
		m.activeAlerts = make(map[string]*Alert)
		m.ackState = make(map[string]ackRecord)
		m.mu.Unlock()

		err := m.LoadActiveAlerts()
		if err != nil {
			t.Fatalf("failed to load alerts: %v", err)
		}

		m.mu.RLock()
		loaded, exists := m.activeAlerts["ack-alert"]
		ackRecord, hasAckRecord := m.ackState["ack-alert"]
		m.mu.RUnlock()

		if !exists {
			t.Fatal("acknowledged alert should be loaded")
		}
		if !loaded.Acknowledged {
			t.Error("loaded alert should be acknowledged")
		}
		if loaded.AckUser != "testuser" {
			t.Errorf("loaded alert AckUser should be testuser, got %s", loaded.AckUser)
		}
		if !hasAckRecord {
			t.Error("ackState should be restored for acknowledged alert")
		}
		if !ackRecord.acknowledged {
			t.Error("ackState should show acknowledged=true")
		}
	})

	t.Run("invalid JSON returns error", func(t *testing.T) {
		m := newTestManager(t)
		m.ClearActiveAlerts()

		// Write invalid JSON to the alerts file
		alertsDir := filepath.Join(utils.GetDataDir(), "alerts")
		if err := os.MkdirAll(alertsDir, 0755); err != nil {
			t.Fatalf("failed to create alerts dir: %v", err)
		}

		alertsFile := filepath.Join(alertsDir, "active-alerts.json")
		if err := os.WriteFile(alertsFile, []byte("invalid json"), 0644); err != nil {
			t.Fatalf("failed to write invalid json: %v", err)
		}

		err := m.LoadActiveAlerts()
		if err == nil {
			t.Error("expected error for invalid JSON")
		}
	})

	t.Run("skips duplicate alerts", func(t *testing.T) {
		m := newTestManager(t)
		m.ClearActiveAlerts()

		// Write JSON with duplicate alert IDs
		alertsDir := filepath.Join(utils.GetDataDir(), "alerts")
		if err := os.MkdirAll(alertsDir, 0755); err != nil {
			t.Fatalf("failed to create alerts dir: %v", err)
		}

		startTime := time.Now().Add(-10 * time.Minute)
		alerts := []Alert{
			{ID: "dup-alert", Type: "cpu", StartTime: startTime, LastSeen: time.Now()},
			{ID: "dup-alert", Type: "memory", StartTime: startTime, LastSeen: time.Now()},
		}

		data, _ := json.Marshal(alerts)
		alertsFile := filepath.Join(alertsDir, "active-alerts.json")
		if err := os.WriteFile(alertsFile, data, 0644); err != nil {
			t.Fatalf("failed to write alerts json: %v", err)
		}

		err := m.LoadActiveAlerts()
		if err != nil {
			t.Fatalf("failed to load alerts: %v", err)
		}

		m.mu.RLock()
		alert, exists := m.activeAlerts["dup-alert"]
		m.mu.RUnlock()

		if !exists {
			t.Fatal("alert should exist after load")
		}
		// First one wins
		if alert.Type != "cpu" {
			t.Errorf("first alert should win, got type %s", alert.Type)
		}
	})

	t.Run("migrates legacy guest alert resource IDs to canonical format", func(t *testing.T) {
		m := newTestManager(t)
		m.ClearActiveAlerts()

		alertsDir := filepath.Join(utils.GetDataDir(), "alerts")
		if err := os.MkdirAll(alertsDir, 0755); err != nil {
			t.Fatalf("failed to create alerts dir: %v", err)
		}

		startTime := time.Now().Add(-10 * time.Minute)
		legacyResourceID := "pve1-100"
		canonicalResourceID := BuildGuestKey("pve1", "node1", 100)
		alerts := []Alert{
			{
				ID:           legacyResourceID + "-cpu",
				Type:         "cpu",
				Level:        AlertLevelWarning,
				ResourceID:   legacyResourceID,
				ResourceName: "test-vm",
				Node:         "node1",
				Instance:     "pve1",
				StartTime:    startTime,
				LastSeen:     time.Now(),
			},
		}

		data, _ := json.Marshal(alerts)
		alertsFile := filepath.Join(alertsDir, "active-alerts.json")
		if err := os.WriteFile(alertsFile, data, 0644); err != nil {
			t.Fatalf("failed to write alerts json: %v", err)
		}

		err := m.LoadActiveAlerts()
		if err != nil {
			t.Fatalf("failed to load alerts: %v", err)
		}

		m.mu.RLock()
		alert, exists := m.activeAlerts[canonicalResourceID+"-cpu"]
		_, oldExists := m.activeAlerts[legacyResourceID+"-cpu"]
		m.mu.RUnlock()

		if !exists {
			t.Fatal("expected canonical guest alert to be loaded")
		}
		if oldExists {
			t.Fatal("expected legacy guest alert ID to be replaced")
		}
		if alert.ResourceID != canonicalResourceID {
			t.Fatalf("expected resource ID %q, got %q", canonicalResourceID, alert.ResourceID)
		}
		if alert.ID != canonicalResourceID+"-cpu" {
			t.Fatalf("expected alert ID %q, got %q", canonicalResourceID+"-cpu", alert.ID)
		}
	})
}

func TestNamespaceMatchesInstance(t *testing.T) {
	tests := []struct {
		name      string
		namespace string
		instance  string
		expected  bool
	}{
		// Exact matches
		{"exact match", "pve", "pve", true},
		{"exact match with numbers", "pve1", "pve1", true},

		// Suffix matches (namespace is suffix of instance)
		{"namespace suffix of instance", "nat", "pve-nat", true},
		{"namespace suffix of instance no dash", "nat", "pvenat", true},

		// Suffix matches (instance is suffix of namespace)
		{"instance suffix of namespace", "pvebackups", "pve", false},  // "pve" is not suffix of "pvebackups"
		{"instance suffix of namespace 2", "backupspve", "pve", true}, // "pve" IS suffix of "backupspve"

		// Case insensitive
		{"case insensitive exact", "PVE", "pve", true},
		{"case insensitive suffix", "NAT", "pve-nat", true},

		// Special characters ignored
		{"special chars in namespace", "pve_nat", "pvenat", true},
		{"special chars in instance", "pvenat", "pve-nat", true},
		{"both have special chars", "pve-1", "pve_1", true},

		// No matches - substring but not suffix
		{"no match substring not suffix", "production", "my-production-server", false}, // "production" is not suffix of "myproductionserver"
		{"no match pve not suffix of pvenat", "pve", "pve-nat", false},                 // "pve" is not suffix of "pvenat"

		// No matches
		{"no match", "production", "staging", false},
		{"no match different names", "pve1", "pve2", false},
		{"no match partial mismatch", "abc", "xyz", false},

		// Empty values
		{"empty namespace", "", "pve", false},
		{"empty instance", "pve", "", false},
		{"both empty", "", "", false},

		// Real-world scenarios from issue #1095
		{"pve namespace with pve instance", "pve", "pve", true},
		{"nat namespace with pve-nat instance", "nat", "pve-nat", true},
		{"pve1 namespace with pve1 instance", "pve1", "pve1", true},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			result := namespaceMatchesInstance(tt.namespace, tt.instance)
			if result != tt.expected {
				t.Errorf("namespaceMatchesInstance(%q, %q) = %v, want %v",
					tt.namespace, tt.instance, result, tt.expected)
			}
		})
	}
}