From 02273e7fcb97d7b2abc849bad166ce2a05c33643 Mon Sep 17 00:00:00 2001 From: rcourtman Date: Tue, 11 Nov 2025 23:52:24 +0000 Subject: [PATCH] Fix monitoring test panic and goroutine leaks Two critical fixes to prevent test timeouts: 1. Nil map panic in TestPollPVEInstanceUsesRRDMemUsedFallback: - Test monitor was missing nodeLastOnline map initialization - Panic occurred when pollPVEInstance tried to update nodeLastOnline[nodeID] - Caused deadlock when panic recovery tried to acquire already-held mutex - Added nodeLastOnline: make(map[string]time.Time) to test monitor 2. Alert manager goroutine leak in Docker tests: - newTestMonitor() created alert manager but never stopped it - Background goroutines (escalationChecker, periodicSaveAlerts) kept running - Added t.Cleanup(func() { m.alertManager.Stop() }) to test helper These fixes resolve the 10+ minute test timeouts in CI workflows. Related to workflow run 19281508603. --- internal/monitoring/monitor_docker_test.go | 4 +++- internal/monitoring/monitor_memory_test.go | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/monitoring/monitor_docker_test.go b/internal/monitoring/monitor_docker_test.go index 33547e213..575c58567 100644 --- a/internal/monitoring/monitor_docker_test.go +++ b/internal/monitoring/monitor_docker_test.go @@ -13,7 +13,7 @@ import ( func newTestMonitor(t *testing.T) *Monitor { t.Helper() - return &Monitor{ + m := &Monitor{ state: models.NewState(), alertManager: alerts.NewManager(), removedDockerHosts: make(map[string]time.Time), @@ -21,6 +21,8 @@ func newTestMonitor(t *testing.T) *Monitor { dockerTokenBindings: make(map[string]string), dockerMetadataStore: config.NewDockerMetadataStore(t.TempDir()), } + t.Cleanup(func() { m.alertManager.Stop() }) + return m } func TestApplyDockerReportGeneratesUniqueIDsForCollidingHosts(t *testing.T) { diff --git a/internal/monitoring/monitor_memory_test.go b/internal/monitoring/monitor_memory_test.go index d3a850aa3..c610640a9 100644 --- a/internal/monitoring/monitor_memory_test.go +++ b/internal/monitoring/monitor_memory_test.go @@ -191,6 +191,7 @@ func TestPollPVEInstanceUsesRRDMemUsedFallback(t *testing.T) { dlqInsightMap: make(map[string]*dlqInsight), authFailures: make(map[string]int), lastAuthAttempt: make(map[string]time.Time), + nodeLastOnline: make(map[string]time.Time), } defer mon.alertManager.Stop() defer mon.notificationMgr.Stop()