mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 03:20:11 +00:00
Respect quiet hours for escalation alerts
Apply quiet-hours suppression to escalation notifications so offline and other suppressed categories do not bypass the normal notification rules during escalation. Fixes #1398.
This commit is contained in:
parent
be9eaa7168
commit
005f64182f
4 changed files with 125 additions and 0 deletions
|
|
@ -2283,6 +2283,28 @@ func (m *Manager) ShouldSuppressResolvedNotification(alert *Alert) bool {
|
|||
return suppressed
|
||||
}
|
||||
|
||||
// ShouldSuppressNotification checks if an alert notification should be suppressed
|
||||
// during quiet hours. Used for paths that bypass dispatchAlert, such as escalation.
|
||||
func (m *Manager) ShouldSuppressNotification(alert *Alert) bool {
|
||||
if alert == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
||||
suppressed, reason := m.shouldSuppressNotification(alert)
|
||||
if suppressed {
|
||||
log.Debug().
|
||||
Str("alertID", alert.ID).
|
||||
Str("type", alert.Type).
|
||||
Str("level", string(alert.Level)).
|
||||
Str("quietHoursRule", reason).
|
||||
Msg("Notification suppressed during quiet hours")
|
||||
}
|
||||
return suppressed
|
||||
}
|
||||
|
||||
// shouldNotifyAfterCooldown checks if enough time has passed since the last notification
|
||||
// Returns true if notification should be sent, false if still in cooldown period
|
||||
func (m *Manager) shouldNotifyAfterCooldown(alert *Alert) bool {
|
||||
|
|
|
|||
|
|
@ -73,6 +73,31 @@ func TestShouldSuppressNotificationQuietHours(t *testing.T) {
|
|||
})
|
||||
}
|
||||
|
||||
func TestShouldSuppressNotificationPublic(t *testing.T) {
|
||||
t.Run("nil alert returns false", func(t *testing.T) {
|
||||
m := newManagerWithQuietHoursSuppress(QuietHoursSuppression{Offline: true})
|
||||
if m.ShouldSuppressNotification(nil) {
|
||||
t.Fatal("expected false for nil alert")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("suppresses offline alert during quiet hours", func(t *testing.T) {
|
||||
m := newManagerWithQuietHoursSuppress(QuietHoursSuppression{Offline: true})
|
||||
alert := &Alert{ID: "offline-esc", Type: "connectivity", Level: AlertLevelCritical}
|
||||
if !m.ShouldSuppressNotification(alert) {
|
||||
t.Fatal("expected offline alert to be suppressed during quiet hours")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("does not suppress when offline suppression disabled", func(t *testing.T) {
|
||||
m := newManagerWithQuietHoursSuppress(QuietHoursSuppression{Offline: false})
|
||||
alert := &Alert{ID: "offline-esc2", Type: "connectivity", Level: AlertLevelCritical}
|
||||
if m.ShouldSuppressNotification(alert) {
|
||||
t.Fatal("expected offline alert not to be suppressed when offline suppression is off")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestIsInQuietHours(t *testing.T) {
|
||||
// t.Parallel()
|
||||
|
||||
|
|
|
|||
|
|
@ -5418,6 +5418,10 @@ func (m *Monitor) Start(ctx context.Context, wsHub *websocket.Hub) {
|
|||
|
||||
escalationLevel := config.Schedule.Escalation.Levels[level-1]
|
||||
|
||||
if m.alertManager.ShouldSuppressNotification(alert) {
|
||||
return
|
||||
}
|
||||
|
||||
// Send notifications only to the channels specified in the escalation level
|
||||
m.notificationMgr.SendAlertToChannels(alert, escalationLevel.Notify)
|
||||
|
||||
|
|
|
|||
|
|
@ -230,3 +230,77 @@ func TestHandleAlertResolved_SendsRecoveryOutsideQuietHours(t *testing.T) {
|
|||
t.Fatalf("timed out waiting for resolved notification webhook")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEscalationCallback_QuietHoursSuppression(t *testing.T) {
|
||||
received := make(chan []byte, 1)
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
defer r.Body.Close()
|
||||
body, _ := io.ReadAll(r.Body)
|
||||
select {
|
||||
case received <- body:
|
||||
default:
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
notifMgr := notifications.NewNotificationManagerWithDataDir("http://pulse.example", t.TempDir())
|
||||
if err := notifMgr.UpdateAllowedPrivateCIDRs("127.0.0.1/32,::1/128"); err != nil {
|
||||
t.Fatalf("UpdateAllowedPrivateCIDRs: %v", err)
|
||||
}
|
||||
notifMgr.AddWebhook(notifications.WebhookConfig{
|
||||
ID: "esc-webhook",
|
||||
Name: "esc-webhook",
|
||||
URL: srv.URL,
|
||||
Enabled: true,
|
||||
Service: "generic",
|
||||
})
|
||||
|
||||
alertMgr := alerts.NewManager()
|
||||
cfg := alertMgr.GetConfig()
|
||||
cfg.Enabled = true
|
||||
cfg.Schedule.QuietHours.Enabled = true
|
||||
cfg.Schedule.QuietHours.Timezone = "UTC"
|
||||
cfg.Schedule.QuietHours.Days = map[string]bool{
|
||||
"monday": true,
|
||||
"tuesday": true,
|
||||
"wednesday": true,
|
||||
"thursday": true,
|
||||
"friday": true,
|
||||
"saturday": true,
|
||||
"sunday": true,
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
cfg.Schedule.QuietHours.Start = now.Add(-1 * time.Hour).Format("15:04")
|
||||
cfg.Schedule.QuietHours.End = now.Add(1 * time.Hour).Format("15:04")
|
||||
cfg.Schedule.QuietHours.Suppress.Offline = true
|
||||
cfg.Schedule.Escalation.Enabled = true
|
||||
cfg.Schedule.Escalation.Levels = []alerts.EscalationLevel{
|
||||
{After: 1, Notify: "webhook"},
|
||||
}
|
||||
alertMgr.UpdateConfig(cfg)
|
||||
|
||||
alert := &alerts.Alert{
|
||||
ID: "esc-offline",
|
||||
Type: "connectivity",
|
||||
Level: alerts.AlertLevelCritical,
|
||||
}
|
||||
if !alertMgr.ShouldSuppressNotification(alert) {
|
||||
t.Skip("quiet hours not active; cannot verify escalation suppression")
|
||||
}
|
||||
|
||||
escalationLevel := alertMgr.GetConfig().Schedule.Escalation.Levels[0]
|
||||
if !alertMgr.ShouldSuppressNotification(alert) {
|
||||
notifMgr.SendAlertToChannels(alert, escalationLevel.Notify)
|
||||
}
|
||||
|
||||
select {
|
||||
case body := <-received:
|
||||
var payload map[string]interface{}
|
||||
if err := json.Unmarshal(body, &payload); err != nil {
|
||||
t.Fatalf("failed to parse unexpected webhook payload: %v", err)
|
||||
}
|
||||
t.Fatalf("expected escalation notification to be suppressed during quiet hours, got payload %#v", payload)
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue