mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-22 03:02:35 +00:00
Fix recovery notifications silently disabled by config PUT (#1332)
Two fixes for missing recovery/resolved notifications: 1. API config PUT handler now preserves notifyOnResolve when the client omits it from the request body. Go decodes a missing bool as false, which silently disabled recovery notifications on older clients. 2. CancelAlert now always cleans up the cooldown record even when the alert has already left the pending buffer, preventing stale cooldown entries from suppressing future alert cycles.
This commit is contained in:
parent
572520ebc6
commit
9b531c547d
2 changed files with 43 additions and 13 deletions
|
|
@ -3,6 +3,7 @@ package api
|
|||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
|
|
@ -142,12 +143,32 @@ func (h *AlertHandlers) UpdateAlertConfig(w http.ResponseWriter, r *http.Request
|
|||
// Limit request body to 64KB to prevent memory exhaustion
|
||||
r.Body = http.MaxBytesReader(w, r.Body, 64*1024)
|
||||
|
||||
var config alerts.AlertConfig
|
||||
if err := json.NewDecoder(r.Body).Decode(&config); err != nil {
|
||||
body, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
var config alerts.AlertConfig
|
||||
if err := json.Unmarshal(body, &config); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Preserve notifyOnResolve when the client omits it from the request body.
|
||||
// Go decodes a missing bool as false, which would silently disable recovery
|
||||
// notifications. Use a *bool probe to distinguish "missing" from "explicitly false".
|
||||
// Refs: #1259, #1332
|
||||
var scheduleProbe struct {
|
||||
Schedule struct {
|
||||
NotifyOnResolve *bool `json:"notifyOnResolve"`
|
||||
} `json:"schedule"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &scheduleProbe); err == nil && scheduleProbe.Schedule.NotifyOnResolve == nil {
|
||||
currentConfig := h.getMonitor(r.Context()).GetAlertManager().GetConfig()
|
||||
config.Schedule.NotifyOnResolve = currentConfig.Schedule.NotifyOnResolve
|
||||
}
|
||||
|
||||
// Migrate deprecated GroupingWindow to Grouping.Window if needed before applying.
|
||||
groupWindow := config.Schedule.Grouping.Window
|
||||
if groupWindow == 0 && config.Schedule.GroupingWindow != 0 {
|
||||
|
|
|
|||
|
|
@ -871,11 +871,30 @@ func (n *NotificationManager) SendResolvedAlert(resolved *alerts.ResolvedAlert)
|
|||
}
|
||||
}
|
||||
|
||||
// CancelAlert removes pending notifications for a resolved alert
|
||||
// CancelAlert removes pending notifications for a resolved alert and cleans
|
||||
// up cooldown and queue state. Always cleans up cooldown and queued firing
|
||||
// notifications even when the alert has already left the pending buffer
|
||||
// (was already grouped and flushed to the queue). Refs: #1332
|
||||
func (n *NotificationManager) CancelAlert(alertID string) {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
|
||||
// Always clean up cooldown record for resolved alert, even if it
|
||||
// already left the pending buffer (i.e. was already sent/grouped).
|
||||
// Without this, a stale cooldown entry can suppress the next firing.
|
||||
delete(n.lastNotified, alertID)
|
||||
|
||||
// Always cancel queued firing notifications for this alert. If the
|
||||
// alert was flushed from the pending buffer to the persistent queue
|
||||
// but hasn't been sent yet, we must cancel it to avoid delivering a
|
||||
// stale "firing" notification after the alert has already resolved.
|
||||
if n.queue != nil {
|
||||
if err := n.queue.CancelByAlertIDs([]string{alertID}); err != nil {
|
||||
log.Error().Err(err).Str("alertID", alertID).Msg("Failed to cancel queued notifications")
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from the in-memory pending buffer if still there
|
||||
if len(n.pendingAlerts) == 0 {
|
||||
return
|
||||
}
|
||||
|
|
@ -910,16 +929,6 @@ func (n *NotificationManager) CancelAlert(alertID string) {
|
|||
n.groupTimer = nil
|
||||
}
|
||||
|
||||
// Clean up cooldown record for resolved alert
|
||||
delete(n.lastNotified, alertID)
|
||||
|
||||
// Cancel any queued notifications containing this alert
|
||||
if n.queue != nil {
|
||||
if err := n.queue.CancelByAlertIDs([]string{alertID}); err != nil {
|
||||
log.Error().Err(err).Str("alertID", alertID).Msg("Failed to cancel queued notifications")
|
||||
}
|
||||
}
|
||||
|
||||
log.Debug().
|
||||
Str("alertID", alertID).
|
||||
Int("remaining", len(n.pendingAlerts)).
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue