Respect quiet hours for escalation alerts

Apply quiet-hours suppression to escalation notifications so offline and other suppressed categories do not bypass the normal notification rules during escalation.

Fixes #1398.
This commit is contained in:
rcourtman 2026-04-12 21:28:57 +01:00
parent be9eaa7168
commit 005f64182f
4 changed files with 125 additions and 0 deletions

View file

@ -2283,6 +2283,28 @@ func (m *Manager) ShouldSuppressResolvedNotification(alert *Alert) bool {
return suppressed
}
// ShouldSuppressNotification checks if an alert notification should be suppressed
// during quiet hours. Used for paths that bypass dispatchAlert, such as escalation.
func (m *Manager) ShouldSuppressNotification(alert *Alert) bool {
if alert == nil {
return false
}
m.mu.RLock()
defer m.mu.RUnlock()
suppressed, reason := m.shouldSuppressNotification(alert)
if suppressed {
log.Debug().
Str("alertID", alert.ID).
Str("type", alert.Type).
Str("level", string(alert.Level)).
Str("quietHoursRule", reason).
Msg("Notification suppressed during quiet hours")
}
return suppressed
}
// shouldNotifyAfterCooldown checks if enough time has passed since the last notification
// Returns true if notification should be sent, false if still in cooldown period
func (m *Manager) shouldNotifyAfterCooldown(alert *Alert) bool {

View file

@ -73,6 +73,31 @@ func TestShouldSuppressNotificationQuietHours(t *testing.T) {
})
}
func TestShouldSuppressNotificationPublic(t *testing.T) {
t.Run("nil alert returns false", func(t *testing.T) {
m := newManagerWithQuietHoursSuppress(QuietHoursSuppression{Offline: true})
if m.ShouldSuppressNotification(nil) {
t.Fatal("expected false for nil alert")
}
})
t.Run("suppresses offline alert during quiet hours", func(t *testing.T) {
m := newManagerWithQuietHoursSuppress(QuietHoursSuppression{Offline: true})
alert := &Alert{ID: "offline-esc", Type: "connectivity", Level: AlertLevelCritical}
if !m.ShouldSuppressNotification(alert) {
t.Fatal("expected offline alert to be suppressed during quiet hours")
}
})
t.Run("does not suppress when offline suppression disabled", func(t *testing.T) {
m := newManagerWithQuietHoursSuppress(QuietHoursSuppression{Offline: false})
alert := &Alert{ID: "offline-esc2", Type: "connectivity", Level: AlertLevelCritical}
if m.ShouldSuppressNotification(alert) {
t.Fatal("expected offline alert not to be suppressed when offline suppression is off")
}
})
}
func TestIsInQuietHours(t *testing.T) {
// t.Parallel()

View file

@ -5418,6 +5418,10 @@ func (m *Monitor) Start(ctx context.Context, wsHub *websocket.Hub) {
escalationLevel := config.Schedule.Escalation.Levels[level-1]
if m.alertManager.ShouldSuppressNotification(alert) {
return
}
// Send notifications only to the channels specified in the escalation level
m.notificationMgr.SendAlertToChannels(alert, escalationLevel.Notify)

View file

@ -230,3 +230,77 @@ func TestHandleAlertResolved_SendsRecoveryOutsideQuietHours(t *testing.T) {
t.Fatalf("timed out waiting for resolved notification webhook")
}
}
func TestEscalationCallback_QuietHoursSuppression(t *testing.T) {
received := make(chan []byte, 1)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
defer r.Body.Close()
body, _ := io.ReadAll(r.Body)
select {
case received <- body:
default:
}
w.WriteHeader(http.StatusOK)
}))
t.Cleanup(srv.Close)
notifMgr := notifications.NewNotificationManagerWithDataDir("http://pulse.example", t.TempDir())
if err := notifMgr.UpdateAllowedPrivateCIDRs("127.0.0.1/32,::1/128"); err != nil {
t.Fatalf("UpdateAllowedPrivateCIDRs: %v", err)
}
notifMgr.AddWebhook(notifications.WebhookConfig{
ID: "esc-webhook",
Name: "esc-webhook",
URL: srv.URL,
Enabled: true,
Service: "generic",
})
alertMgr := alerts.NewManager()
cfg := alertMgr.GetConfig()
cfg.Enabled = true
cfg.Schedule.QuietHours.Enabled = true
cfg.Schedule.QuietHours.Timezone = "UTC"
cfg.Schedule.QuietHours.Days = map[string]bool{
"monday": true,
"tuesday": true,
"wednesday": true,
"thursday": true,
"friday": true,
"saturday": true,
"sunday": true,
}
now := time.Now().UTC()
cfg.Schedule.QuietHours.Start = now.Add(-1 * time.Hour).Format("15:04")
cfg.Schedule.QuietHours.End = now.Add(1 * time.Hour).Format("15:04")
cfg.Schedule.QuietHours.Suppress.Offline = true
cfg.Schedule.Escalation.Enabled = true
cfg.Schedule.Escalation.Levels = []alerts.EscalationLevel{
{After: 1, Notify: "webhook"},
}
alertMgr.UpdateConfig(cfg)
alert := &alerts.Alert{
ID: "esc-offline",
Type: "connectivity",
Level: alerts.AlertLevelCritical,
}
if !alertMgr.ShouldSuppressNotification(alert) {
t.Skip("quiet hours not active; cannot verify escalation suppression")
}
escalationLevel := alertMgr.GetConfig().Schedule.Escalation.Levels[0]
if !alertMgr.ShouldSuppressNotification(alert) {
notifMgr.SendAlertToChannels(alert, escalationLevel.Notify)
}
select {
case body := <-received:
var payload map[string]interface{}
if err := json.Unmarshal(body, &payload); err != nil {
t.Fatalf("failed to parse unexpected webhook payload: %v", err)
}
t.Fatalf("expected escalation notification to be suppressed during quiet hours, got payload %#v", payload)
case <-time.After(500 * time.Millisecond):
}
}