diff --git a/internal/api/alerts.go b/internal/api/alerts.go index 55606c964..cc33613ca 100644 --- a/internal/api/alerts.go +++ b/internal/api/alerts.go @@ -3,6 +3,7 @@ package api import ( "context" "encoding/json" + "io" "net/http" "net/url" "strconv" @@ -142,12 +143,32 @@ func (h *AlertHandlers) UpdateAlertConfig(w http.ResponseWriter, r *http.Request // Limit request body to 64KB to prevent memory exhaustion r.Body = http.MaxBytesReader(w, r.Body, 64*1024) - var config alerts.AlertConfig - if err := json.NewDecoder(r.Body).Decode(&config); err != nil { + body, err := io.ReadAll(r.Body) + if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } + var config alerts.AlertConfig + if err := json.Unmarshal(body, &config); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + // Preserve notifyOnResolve when the client omits it from the request body. + // Go decodes a missing bool as false, which would silently disable recovery + // notifications. Use a *bool probe to distinguish "missing" from "explicitly false". + // Refs: #1259, #1332 + var scheduleProbe struct { + Schedule struct { + NotifyOnResolve *bool `json:"notifyOnResolve"` + } `json:"schedule"` + } + if err := json.Unmarshal(body, &scheduleProbe); err == nil && scheduleProbe.Schedule.NotifyOnResolve == nil { + currentConfig := h.getMonitor(r.Context()).GetAlertManager().GetConfig() + config.Schedule.NotifyOnResolve = currentConfig.Schedule.NotifyOnResolve + } + // Migrate deprecated GroupingWindow to Grouping.Window if needed before applying. groupWindow := config.Schedule.Grouping.Window if groupWindow == 0 && config.Schedule.GroupingWindow != 0 { diff --git a/internal/notifications/notifications.go b/internal/notifications/notifications.go index 01bf5bf8e..c749e91da 100644 --- a/internal/notifications/notifications.go +++ b/internal/notifications/notifications.go @@ -871,11 +871,30 @@ func (n *NotificationManager) SendResolvedAlert(resolved *alerts.ResolvedAlert) } } -// CancelAlert removes pending notifications for a resolved alert +// CancelAlert removes pending notifications for a resolved alert and cleans +// up cooldown and queue state. Always cleans up cooldown and queued firing +// notifications even when the alert has already left the pending buffer +// (was already grouped and flushed to the queue). Refs: #1332 func (n *NotificationManager) CancelAlert(alertID string) { n.mu.Lock() defer n.mu.Unlock() + // Always clean up cooldown record for resolved alert, even if it + // already left the pending buffer (i.e. was already sent/grouped). + // Without this, a stale cooldown entry can suppress the next firing. + delete(n.lastNotified, alertID) + + // Always cancel queued firing notifications for this alert. If the + // alert was flushed from the pending buffer to the persistent queue + // but hasn't been sent yet, we must cancel it to avoid delivering a + // stale "firing" notification after the alert has already resolved. + if n.queue != nil { + if err := n.queue.CancelByAlertIDs([]string{alertID}); err != nil { + log.Error().Err(err).Str("alertID", alertID).Msg("Failed to cancel queued notifications") + } + } + + // Remove from the in-memory pending buffer if still there if len(n.pendingAlerts) == 0 { return } @@ -910,16 +929,6 @@ func (n *NotificationManager) CancelAlert(alertID string) { n.groupTimer = nil } - // Clean up cooldown record for resolved alert - delete(n.lastNotified, alertID) - - // Cancel any queued notifications containing this alert - if n.queue != nil { - if err := n.queue.CancelByAlertIDs([]string{alertID}); err != nil { - log.Error().Err(err).Str("alertID", alertID).Msg("Failed to cancel queued notifications") - } - } - log.Debug(). Str("alertID", alertID). Int("remaining", len(n.pendingAlerts)).