mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 03:20:11 +00:00
Apply quiet-hours suppression to escalation notifications so offline and other suppressed categories do not bypass the normal notification rules during escalation. Fixes #1398.
306 lines
8.9 KiB
Go
306 lines
8.9 KiB
Go
package monitoring
|
|
|
|
import (
|
|
"encoding/json"
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/alerts"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/models"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/notifications"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/websocket"
|
|
)
|
|
|
|
func TestMonitor_HandleAlertFired_Extra(t *testing.T) {
|
|
// 1. Alert is nil
|
|
m1 := &Monitor{}
|
|
m1.handleAlertFired(nil) // Should return safely
|
|
|
|
// 2. Alert is not nil, with Hub and NotificationMgr
|
|
hub := websocket.NewHub(nil)
|
|
notifMgr := notifications.NewNotificationManager("dummy")
|
|
|
|
// mock incidentStore - but it is an interface or struct?
|
|
// In monitor.go: func (m *Monitor) GetIncidentStore() *incidents.Store
|
|
// It's a pointer to struct, so hard to mock unless we set it to nil or real store.
|
|
// We can set it to nil for this test to avoid disk I/O.
|
|
|
|
m2 := &Monitor{
|
|
wsHub: hub,
|
|
notificationMgr: notifMgr,
|
|
incidentStore: nil,
|
|
}
|
|
|
|
alert := &alerts.Alert{
|
|
ID: "test-alert",
|
|
Level: alerts.AlertLevelWarning,
|
|
}
|
|
|
|
m2.handleAlertFired(alert)
|
|
// We are just verifying it doesn't crash and calls methods.
|
|
// Hub doesn't expose way to check broadcasts easily without client.
|
|
// NotificationMgr might spin up goroutine.
|
|
}
|
|
|
|
func TestMonitor_HandleAlertResolved_Detailed_Extra(t *testing.T) {
|
|
// 1. With Hub and NotificationMgr and Resolve Notify ON
|
|
hub := websocket.NewHub(nil)
|
|
notifMgr := notifications.NewNotificationManager("dummy")
|
|
|
|
// Enable resolve notifications
|
|
// Notifications config needs to be updated?
|
|
// notificationMgr.GetNotifyOnResolve() reads config.
|
|
// But NotificationManager struct doesn't export Config update easily without SetConfig?
|
|
// The constructor initializes defaults.
|
|
|
|
m := &Monitor{
|
|
wsHub: hub,
|
|
notificationMgr: notifMgr,
|
|
alertManager: alerts.NewManager(),
|
|
}
|
|
|
|
// This should run safely
|
|
m.handleAlertResolved("alert-id")
|
|
}
|
|
|
|
func TestHandleAlertResolved_QuietHoursSuppressesRecovery(t *testing.T) {
|
|
received := make(chan []byte, 1)
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
defer r.Body.Close()
|
|
body, _ := io.ReadAll(r.Body)
|
|
select {
|
|
case received <- body:
|
|
default:
|
|
}
|
|
w.WriteHeader(http.StatusOK)
|
|
}))
|
|
t.Cleanup(srv.Close)
|
|
|
|
notifMgr := notifications.NewNotificationManagerWithDataDir("http://pulse.example", t.TempDir())
|
|
if err := notifMgr.UpdateAllowedPrivateCIDRs("127.0.0.1/32,::1/128"); err != nil {
|
|
t.Fatalf("UpdateAllowedPrivateCIDRs: %v", err)
|
|
}
|
|
notifMgr.AddWebhook(notifications.WebhookConfig{
|
|
ID: "test-webhook",
|
|
Name: "test-webhook",
|
|
URL: srv.URL,
|
|
Enabled: true,
|
|
Service: "generic",
|
|
})
|
|
notifMgr.SetNotifyOnResolve(true)
|
|
|
|
alertMgr := alerts.NewManager()
|
|
cfg := alertMgr.GetConfig()
|
|
cfg.Enabled = true
|
|
cfg.GuestDefaults.PoweredOffSeverity = alerts.AlertLevelWarning
|
|
cfg.Schedule.QuietHours.Enabled = true
|
|
cfg.Schedule.QuietHours.Timezone = "UTC"
|
|
cfg.Schedule.QuietHours.Days = map[string]bool{
|
|
"monday": true,
|
|
"tuesday": true,
|
|
"wednesday": true,
|
|
"thursday": true,
|
|
"friday": true,
|
|
"saturday": true,
|
|
"sunday": true,
|
|
}
|
|
now := time.Now().UTC()
|
|
cfg.Schedule.QuietHours.Start = now.Add(-1 * time.Hour).Format("15:04")
|
|
cfg.Schedule.QuietHours.End = now.Add(1 * time.Hour).Format("15:04")
|
|
alertMgr.UpdateConfig(cfg)
|
|
|
|
m := &Monitor{
|
|
alertManager: alertMgr,
|
|
notificationMgr: notifMgr,
|
|
}
|
|
alertMgr.SetResolvedCallback(m.handleAlertResolved)
|
|
|
|
vm := models.VM{
|
|
ID: "vm-1",
|
|
Name: "test-vm",
|
|
Node: "node-1",
|
|
Instance: "inst-1",
|
|
Status: "stopped",
|
|
Memory: models.Memory{Usage: 0},
|
|
Disk: models.Disk{Usage: 0},
|
|
}
|
|
|
|
// Two consecutive stopped polls are required to trigger the powered-off alert.
|
|
alertMgr.CheckGuest(vm, vm.Instance)
|
|
alertMgr.CheckGuest(vm, vm.Instance)
|
|
|
|
alertID := "guest-powered-off-" + vm.ID
|
|
|
|
// Recover while quiet hours are active.
|
|
vm.Status = "running"
|
|
alertMgr.CheckGuest(vm, vm.Instance)
|
|
|
|
resolved := alertMgr.GetResolvedAlert(alertID)
|
|
if resolved == nil || resolved.Alert == nil {
|
|
t.Fatalf("expected resolved alert %q to exist", alertID)
|
|
}
|
|
if !alertMgr.ShouldSuppressResolvedNotification(resolved.Alert) {
|
|
t.Fatalf("expected quiet hours suppression to be active for non-critical resolved alert %q (test precondition)", alertID)
|
|
}
|
|
|
|
select {
|
|
case body := <-received:
|
|
var payload map[string]interface{}
|
|
if err := json.Unmarshal(body, &payload); err != nil {
|
|
t.Fatalf("failed to parse unexpected webhook payload: %v", err)
|
|
}
|
|
t.Fatalf("expected resolved notification to be suppressed during quiet hours, got payload %#v", payload)
|
|
case <-time.After(500 * time.Millisecond):
|
|
}
|
|
}
|
|
|
|
func TestHandleAlertResolved_SendsRecoveryOutsideQuietHours(t *testing.T) {
|
|
received := make(chan []byte, 1)
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
defer r.Body.Close()
|
|
body, _ := io.ReadAll(r.Body)
|
|
select {
|
|
case received <- body:
|
|
default:
|
|
}
|
|
w.WriteHeader(http.StatusOK)
|
|
}))
|
|
t.Cleanup(srv.Close)
|
|
|
|
notifMgr := notifications.NewNotificationManagerWithDataDir("http://pulse.example", t.TempDir())
|
|
if err := notifMgr.UpdateAllowedPrivateCIDRs("127.0.0.1/32,::1/128"); err != nil {
|
|
t.Fatalf("UpdateAllowedPrivateCIDRs: %v", err)
|
|
}
|
|
notifMgr.AddWebhook(notifications.WebhookConfig{
|
|
ID: "test-webhook",
|
|
Name: "test-webhook",
|
|
URL: srv.URL,
|
|
Enabled: true,
|
|
Service: "generic",
|
|
})
|
|
notifMgr.SetNotifyOnResolve(true)
|
|
|
|
alertMgr := alerts.NewManager()
|
|
cfg := alertMgr.GetConfig()
|
|
cfg.Enabled = true
|
|
cfg.GuestDefaults.PoweredOffSeverity = alerts.AlertLevelWarning
|
|
cfg.Schedule.QuietHours.Enabled = false
|
|
alertMgr.UpdateConfig(cfg)
|
|
|
|
m := &Monitor{
|
|
alertManager: alertMgr,
|
|
notificationMgr: notifMgr,
|
|
}
|
|
alertMgr.SetResolvedCallback(m.handleAlertResolved)
|
|
|
|
vm := models.VM{
|
|
ID: "vm-2",
|
|
Name: "test-vm-2",
|
|
Node: "node-1",
|
|
Instance: "inst-1",
|
|
Status: "stopped",
|
|
Memory: models.Memory{Usage: 0},
|
|
Disk: models.Disk{Usage: 0},
|
|
}
|
|
|
|
alertMgr.CheckGuest(vm, vm.Instance)
|
|
alertMgr.CheckGuest(vm, vm.Instance)
|
|
|
|
alertID := "guest-powered-off-" + vm.ID
|
|
|
|
vm.Status = "running"
|
|
alertMgr.CheckGuest(vm, vm.Instance)
|
|
|
|
select {
|
|
case body := <-received:
|
|
var payload map[string]interface{}
|
|
if err := json.Unmarshal(body, &payload); err != nil {
|
|
t.Fatalf("failed to parse webhook payload: %v", err)
|
|
}
|
|
if payload["event"] != "resolved" {
|
|
t.Fatalf("expected webhook event=resolved, got %v", payload["event"])
|
|
}
|
|
if payload["alertId"] != alertID {
|
|
t.Fatalf("expected webhook alertId=%q, got %v", alertID, payload["alertId"])
|
|
}
|
|
case <-time.After(5 * time.Second):
|
|
t.Fatalf("timed out waiting for resolved notification webhook")
|
|
}
|
|
}
|
|
|
|
func TestEscalationCallback_QuietHoursSuppression(t *testing.T) {
|
|
received := make(chan []byte, 1)
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
defer r.Body.Close()
|
|
body, _ := io.ReadAll(r.Body)
|
|
select {
|
|
case received <- body:
|
|
default:
|
|
}
|
|
w.WriteHeader(http.StatusOK)
|
|
}))
|
|
t.Cleanup(srv.Close)
|
|
|
|
notifMgr := notifications.NewNotificationManagerWithDataDir("http://pulse.example", t.TempDir())
|
|
if err := notifMgr.UpdateAllowedPrivateCIDRs("127.0.0.1/32,::1/128"); err != nil {
|
|
t.Fatalf("UpdateAllowedPrivateCIDRs: %v", err)
|
|
}
|
|
notifMgr.AddWebhook(notifications.WebhookConfig{
|
|
ID: "esc-webhook",
|
|
Name: "esc-webhook",
|
|
URL: srv.URL,
|
|
Enabled: true,
|
|
Service: "generic",
|
|
})
|
|
|
|
alertMgr := alerts.NewManager()
|
|
cfg := alertMgr.GetConfig()
|
|
cfg.Enabled = true
|
|
cfg.Schedule.QuietHours.Enabled = true
|
|
cfg.Schedule.QuietHours.Timezone = "UTC"
|
|
cfg.Schedule.QuietHours.Days = map[string]bool{
|
|
"monday": true,
|
|
"tuesday": true,
|
|
"wednesday": true,
|
|
"thursday": true,
|
|
"friday": true,
|
|
"saturday": true,
|
|
"sunday": true,
|
|
}
|
|
now := time.Now().UTC()
|
|
cfg.Schedule.QuietHours.Start = now.Add(-1 * time.Hour).Format("15:04")
|
|
cfg.Schedule.QuietHours.End = now.Add(1 * time.Hour).Format("15:04")
|
|
cfg.Schedule.QuietHours.Suppress.Offline = true
|
|
cfg.Schedule.Escalation.Enabled = true
|
|
cfg.Schedule.Escalation.Levels = []alerts.EscalationLevel{
|
|
{After: 1, Notify: "webhook"},
|
|
}
|
|
alertMgr.UpdateConfig(cfg)
|
|
|
|
alert := &alerts.Alert{
|
|
ID: "esc-offline",
|
|
Type: "connectivity",
|
|
Level: alerts.AlertLevelCritical,
|
|
}
|
|
if !alertMgr.ShouldSuppressNotification(alert) {
|
|
t.Skip("quiet hours not active; cannot verify escalation suppression")
|
|
}
|
|
|
|
escalationLevel := alertMgr.GetConfig().Schedule.Escalation.Levels[0]
|
|
if !alertMgr.ShouldSuppressNotification(alert) {
|
|
notifMgr.SendAlertToChannels(alert, escalationLevel.Notify)
|
|
}
|
|
|
|
select {
|
|
case body := <-received:
|
|
var payload map[string]interface{}
|
|
if err := json.Unmarshal(body, &payload); err != nil {
|
|
t.Fatalf("failed to parse unexpected webhook payload: %v", err)
|
|
}
|
|
t.Fatalf("expected escalation notification to be suppressed during quiet hours, got payload %#v", payload)
|
|
case <-time.After(500 * time.Millisecond):
|
|
}
|
|
}
|