mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 11:30:15 +00:00
3566 lines
107 KiB
Go
3566 lines
107 KiB
Go
package notifications
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"crypto/tls"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"html"
|
|
"io"
|
|
"math"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"os/exec"
|
|
"strings"
|
|
"sync"
|
|
"text/template"
|
|
"time"
|
|
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/alerts"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
// Webhook configuration constants
|
|
const (
|
|
// HTTP client settings
|
|
WebhookTimeout = 30 * time.Second
|
|
WebhookMaxResponseSize = 1 * 1024 * 1024 // 1 MB max response size
|
|
WebhookMaxRedirects = 3 // Maximum number of redirects to follow
|
|
WebhookTestTimeout = 10 * time.Second
|
|
|
|
// Retry settings
|
|
WebhookInitialBackoff = 1 * time.Second
|
|
WebhookMaxBackoff = 30 * time.Second
|
|
WebhookDefaultRetries = 3
|
|
|
|
// History settings
|
|
WebhookHistoryMaxSize = 100
|
|
|
|
// Rate limiting settings
|
|
WebhookRateLimitWindow = 1 * time.Minute // Time window for rate limiting
|
|
WebhookRateLimitMax = 10 // Max requests per window per webhook
|
|
)
|
|
|
|
const (
|
|
queueTypeSuffixResolved = "_resolved"
|
|
queueTypeSuffixEscalation = "_escalation"
|
|
metadataResolvedAt = "resolvedAt"
|
|
)
|
|
|
|
type notificationEvent string
|
|
|
|
const (
|
|
eventAlert notificationEvent = "alert"
|
|
eventResolved notificationEvent = "resolved"
|
|
eventEscalation notificationEvent = "escalation"
|
|
)
|
|
|
|
var ErrNotificationCancelled = errors.New("notification cancelled")
|
|
|
|
// createSecureWebhookClient creates an HTTP client with security controls
|
|
func (n *NotificationManager) createSecureWebhookClient(timeout time.Duration) *http.Client {
|
|
// dedicated transport that pins DNS resolution to prevent rebinding
|
|
transport := &http.Transport{
|
|
Proxy: http.ProxyFromEnvironment,
|
|
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
|
|
// Extract hostname and port
|
|
host, port, err := net.SplitHostPort(addr)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Validate IP if it's already an IP
|
|
if ip := net.ParseIP(host); ip != nil {
|
|
if isPrivateIP(ip) && !n.isIPInAllowlist(ip) {
|
|
return nil, fmt.Errorf("blocked private IP: %s", ip)
|
|
}
|
|
// It's an IP, dial directly
|
|
d := net.Dialer{Timeout: 10 * time.Second}
|
|
return d.DialContext(ctx, network, addr)
|
|
}
|
|
|
|
// Resolve hostname
|
|
ips, err := net.LookupIP(host)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Find first permitted IP
|
|
var permittedIP net.IP
|
|
for _, ip := range ips {
|
|
if !isPrivateIP(ip) || n.isIPInAllowlist(ip) {
|
|
permittedIP = ip
|
|
break
|
|
}
|
|
}
|
|
|
|
if permittedIP == nil {
|
|
return nil, fmt.Errorf("hostname %s resolves to blocked private IPs", host)
|
|
}
|
|
|
|
// Log if we filtered some IPs
|
|
if len(ips) > 1 {
|
|
log.Debug().
|
|
Str("host", host).
|
|
Str("selected_ip", permittedIP.String()).
|
|
Msg("DNS resolution pinned for webhook security")
|
|
}
|
|
|
|
// Dial the permitted IP
|
|
d := net.Dialer{Timeout: 10 * time.Second}
|
|
return d.DialContext(ctx, network, net.JoinHostPort(permittedIP.String(), port))
|
|
},
|
|
ForceAttemptHTTP2: true,
|
|
MaxIdleConns: 100,
|
|
IdleConnTimeout: 90 * time.Second,
|
|
TLSHandshakeTimeout: 10 * time.Second,
|
|
ExpectContinueTimeout: 1 * time.Second,
|
|
}
|
|
|
|
return &http.Client{
|
|
Timeout: timeout,
|
|
Transport: transport,
|
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
|
if len(via) >= WebhookMaxRedirects {
|
|
return fmt.Errorf("stopped after %d redirects", WebhookMaxRedirects)
|
|
}
|
|
// Re-validate strictly on redirect
|
|
return n.ValidateWebhookURL(req.URL.String())
|
|
},
|
|
}
|
|
}
|
|
|
|
// TestNodeInfo contains information about nodes for test notifications
|
|
type TestNodeInfo struct {
|
|
NodeName string
|
|
InstanceURL string
|
|
}
|
|
|
|
// WebhookDelivery tracks webhook delivery attempts for debugging
|
|
type WebhookDelivery struct {
|
|
WebhookName string `json:"webhookName"`
|
|
WebhookURL string `json:"webhookUrl"`
|
|
Service string `json:"service"`
|
|
AlertID string `json:"alertId"`
|
|
Timestamp time.Time `json:"timestamp"`
|
|
StatusCode int `json:"statusCode"`
|
|
Success bool `json:"success"`
|
|
ErrorMessage string `json:"errorMessage,omitempty"`
|
|
RetryAttempts int `json:"retryAttempts"`
|
|
PayloadSize int `json:"payloadSize"`
|
|
}
|
|
|
|
// webhookRateLimit tracks rate limiting for webhook deliveries
|
|
type webhookRateLimit struct {
|
|
lastSent time.Time
|
|
sentCount int
|
|
}
|
|
|
|
// NotificationManager handles sending notifications
|
|
type NotificationManager struct {
|
|
mu sync.RWMutex
|
|
emailConfig EmailConfig
|
|
emailManager *EnhancedEmailManager // Shared email manager for rate limiting
|
|
webhooks []WebhookConfig
|
|
appriseConfig AppriseConfig
|
|
enabled bool
|
|
cooldown time.Duration
|
|
notifyOnResolve bool
|
|
lastNotified map[string]notificationRecord
|
|
groupWindow time.Duration
|
|
pendingAlerts []*alerts.Alert
|
|
groupTimer *time.Timer
|
|
groupByNode bool
|
|
publicURL string // Full URL to access Pulse
|
|
groupByGuest bool
|
|
webhookHistory []WebhookDelivery // Keep last 100 webhook deliveries for debugging
|
|
webhookRateLimits map[string]*webhookRateLimit // Track rate limits per webhook URL
|
|
webhookRateMu sync.Mutex // Separate mutex for webhook rate limiting
|
|
appriseExec appriseExecFunc
|
|
queue *NotificationQueue // Persistent notification queue
|
|
webhookClient *http.Client // Shared HTTP client for webhooks
|
|
stopCleanup chan struct{} // Signal to stop cleanup goroutine
|
|
allowedPrivateNets []*net.IPNet // Parsed CIDR ranges allowed for private webhook targets
|
|
allowedPrivateMu sync.RWMutex // Protects allowedPrivateNets
|
|
}
|
|
|
|
type appriseExecFunc func(ctx context.Context, args []string) ([]byte, error)
|
|
|
|
// copyEmailConfig returns a defensive copy of EmailConfig including its slices to avoid data races.
|
|
func copyEmailConfig(cfg EmailConfig) EmailConfig {
|
|
copy := cfg
|
|
if len(cfg.To) > 0 {
|
|
copy.To = append([]string(nil), cfg.To...)
|
|
}
|
|
return copy
|
|
}
|
|
|
|
// copyWebhookConfigs deep-copies webhook configurations to isolate concurrent writers from background senders.
|
|
func copyWebhookConfigs(webhooks []WebhookConfig) []WebhookConfig {
|
|
if len(webhooks) == 0 {
|
|
return nil
|
|
}
|
|
|
|
copies := make([]WebhookConfig, 0, len(webhooks))
|
|
for _, webhook := range webhooks {
|
|
clone := webhook
|
|
if len(webhook.Headers) > 0 {
|
|
headers := make(map[string]string, len(webhook.Headers))
|
|
for k, v := range webhook.Headers {
|
|
headers[k] = v
|
|
}
|
|
clone.Headers = headers
|
|
}
|
|
if len(webhook.CustomFields) > 0 {
|
|
custom := make(map[string]string, len(webhook.CustomFields))
|
|
for k, v := range webhook.CustomFields {
|
|
custom[k] = v
|
|
}
|
|
clone.CustomFields = custom
|
|
}
|
|
copies = append(copies, clone)
|
|
}
|
|
|
|
return copies
|
|
}
|
|
|
|
func copyAppriseConfig(cfg AppriseConfig) AppriseConfig {
|
|
copy := cfg
|
|
if len(cfg.Targets) > 0 {
|
|
copy.Targets = append([]string(nil), cfg.Targets...)
|
|
}
|
|
return copy
|
|
}
|
|
|
|
// annotateResolvedMetadata stores the resolution timestamp on the alert metadata for queue persistence.
|
|
func annotateResolvedMetadata(alert *alerts.Alert, resolvedAt time.Time) {
|
|
if alert == nil {
|
|
return
|
|
}
|
|
if alert.Metadata == nil {
|
|
alert.Metadata = make(map[string]interface{})
|
|
}
|
|
alert.Metadata[metadataResolvedAt] = resolvedAt.Format(time.RFC3339)
|
|
}
|
|
|
|
// NormalizeAppriseConfig cleans and normalizes Apprise configuration values.
|
|
func NormalizeAppriseConfig(cfg AppriseConfig) AppriseConfig {
|
|
normalized := cfg
|
|
|
|
mode := strings.ToLower(strings.TrimSpace(string(normalized.Mode)))
|
|
switch mode {
|
|
case string(AppriseModeHTTP):
|
|
normalized.Mode = AppriseModeHTTP
|
|
default:
|
|
normalized.Mode = AppriseModeCLI
|
|
}
|
|
|
|
normalized.CLIPath = "apprise" // Force default binary for security
|
|
|
|
if normalized.TimeoutSeconds <= 0 {
|
|
normalized.TimeoutSeconds = 15
|
|
} else if normalized.TimeoutSeconds > 120 {
|
|
normalized.TimeoutSeconds = 120
|
|
} else if normalized.TimeoutSeconds < 5 {
|
|
normalized.TimeoutSeconds = 5
|
|
}
|
|
|
|
cleanTargets := make([]string, 0, len(normalized.Targets))
|
|
seen := make(map[string]struct{}, len(normalized.Targets))
|
|
for _, target := range normalized.Targets {
|
|
trimmed := strings.TrimSpace(target)
|
|
if trimmed == "" {
|
|
continue
|
|
}
|
|
lower := strings.ToLower(trimmed)
|
|
if _, exists := seen[lower]; exists {
|
|
continue
|
|
}
|
|
seen[lower] = struct{}{}
|
|
cleanTargets = append(cleanTargets, trimmed)
|
|
}
|
|
normalized.Targets = cleanTargets
|
|
|
|
normalized.ServerURL = strings.TrimSpace(normalized.ServerURL)
|
|
normalized.ServerURL = strings.TrimRight(normalized.ServerURL, "/")
|
|
|
|
normalized.ConfigKey = strings.TrimSpace(normalized.ConfigKey)
|
|
|
|
normalized.APIKey = strings.TrimSpace(normalized.APIKey)
|
|
normalized.APIKeyHeader = strings.TrimSpace(normalized.APIKeyHeader)
|
|
if normalized.APIKeyHeader == "" {
|
|
normalized.APIKeyHeader = "X-API-KEY"
|
|
}
|
|
|
|
switch normalized.Mode {
|
|
case AppriseModeCLI:
|
|
if len(normalized.Targets) == 0 {
|
|
normalized.Enabled = false
|
|
}
|
|
case AppriseModeHTTP:
|
|
if normalized.ServerURL == "" {
|
|
normalized.Enabled = false
|
|
}
|
|
}
|
|
|
|
return normalized
|
|
}
|
|
|
|
func defaultAppriseExec(ctx context.Context, args []string) ([]byte, error) {
|
|
cmd := exec.CommandContext(ctx, "apprise", args...)
|
|
return cmd.CombinedOutput()
|
|
}
|
|
|
|
type notificationRecord struct {
|
|
lastSent time.Time
|
|
alertStart time.Time
|
|
}
|
|
|
|
// Alert represents an alert (interface to avoid circular dependency)
|
|
type Alert interface {
|
|
GetID() string
|
|
GetResourceName() string
|
|
GetType() string
|
|
GetLevel() string
|
|
GetValue() float64
|
|
GetThreshold() float64
|
|
GetMessage() string
|
|
GetNode() string
|
|
GetInstance() string
|
|
GetStartTime() time.Time
|
|
}
|
|
|
|
// EmailConfig holds email notification settings
|
|
type EmailConfig struct {
|
|
Enabled bool `json:"enabled"`
|
|
Provider string `json:"provider"` // Email provider name (Gmail, SendGrid, etc.)
|
|
SMTPHost string `json:"server"` // Changed from smtpHost to server for frontend consistency
|
|
SMTPPort int `json:"port"` // Changed from smtpPort to port for frontend consistency
|
|
Username string `json:"username"`
|
|
Password string `json:"password"`
|
|
From string `json:"from"`
|
|
To []string `json:"to"`
|
|
TLS bool `json:"tls"`
|
|
StartTLS bool `json:"startTLS"` // STARTTLS support
|
|
RateLimit int `json:"rateLimit"` // Max emails per minute (0 = default 60)
|
|
}
|
|
|
|
// WebhookConfig holds webhook settings
|
|
type WebhookConfig struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
URL string `json:"url"`
|
|
Method string `json:"method"`
|
|
Headers map[string]string `json:"headers"`
|
|
Enabled bool `json:"enabled"`
|
|
Service string `json:"service"` // discord, slack, teams, etc.
|
|
Template string `json:"template"` // Custom payload template
|
|
CustomFields map[string]string `json:"customFields,omitempty"`
|
|
Mention string `json:"mention,omitempty"` // Platform-specific mention (e.g., @everyone, @channel, <@USER_ID>)
|
|
}
|
|
|
|
// AppriseMode identifies how Pulse should deliver notifications through Apprise.
|
|
type AppriseMode string
|
|
|
|
const (
|
|
AppriseModeCLI AppriseMode = "cli"
|
|
AppriseModeHTTP AppriseMode = "http"
|
|
)
|
|
|
|
// AppriseConfig holds Apprise notification settings.
|
|
type AppriseConfig struct {
|
|
Enabled bool `json:"enabled"`
|
|
Mode AppriseMode `json:"mode,omitempty"`
|
|
Targets []string `json:"targets"`
|
|
CLIPath string `json:"cliPath,omitempty"`
|
|
TimeoutSeconds int `json:"timeoutSeconds,omitempty"`
|
|
ServerURL string `json:"serverUrl,omitempty"`
|
|
ConfigKey string `json:"configKey,omitempty"`
|
|
APIKey string `json:"apiKey,omitempty"`
|
|
APIKeyHeader string `json:"apiKeyHeader,omitempty"`
|
|
SkipTLSVerify bool `json:"skipTlsVerify,omitempty"`
|
|
}
|
|
|
|
// NewNotificationManager creates a new notification manager using the global data directory.
|
|
// For multi-tenant deployments, use NewNotificationManagerWithDataDir instead.
|
|
func NewNotificationManager(publicURL string) *NotificationManager {
|
|
return NewNotificationManagerWithDataDir(publicURL, "")
|
|
}
|
|
|
|
// NewNotificationManagerWithDataDir creates a new notification manager with a custom data directory.
|
|
// This enables tenant-scoped notification queue persistence in multi-tenant deployments.
|
|
// If dataDir is empty, it uses the global data directory.
|
|
func NewNotificationManagerWithDataDir(publicURL string, dataDir string) *NotificationManager {
|
|
cleanURL := strings.TrimRight(strings.TrimSpace(publicURL), "/")
|
|
if cleanURL != "" {
|
|
log.Info().Str("publicURL", cleanURL).Msg("NotificationManager initialized with public URL")
|
|
} else {
|
|
log.Info().Msg("NotificationManager initialized without public URL - webhook links may not work")
|
|
}
|
|
|
|
// Initialize persistent queue with tenant-specific data directory
|
|
queue, err := NewNotificationQueue(dataDir)
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("Failed to initialize notification queue, notifications will be in-memory only")
|
|
queue = nil
|
|
}
|
|
|
|
nm := &NotificationManager{
|
|
enabled: true,
|
|
cooldown: 5 * time.Minute,
|
|
notifyOnResolve: true,
|
|
lastNotified: make(map[string]notificationRecord),
|
|
webhooks: []WebhookConfig{},
|
|
appriseConfig: AppriseConfig{
|
|
Enabled: false,
|
|
Mode: AppriseModeCLI,
|
|
Targets: []string{},
|
|
CLIPath: "apprise",
|
|
TimeoutSeconds: 15,
|
|
APIKeyHeader: "X-API-KEY",
|
|
},
|
|
groupWindow: 30 * time.Second,
|
|
pendingAlerts: make([]*alerts.Alert, 0),
|
|
groupByNode: true,
|
|
groupByGuest: false,
|
|
webhookHistory: make([]WebhookDelivery, 0, WebhookHistoryMaxSize),
|
|
webhookRateLimits: make(map[string]*webhookRateLimit),
|
|
publicURL: cleanURL,
|
|
appriseExec: defaultAppriseExec,
|
|
queue: queue,
|
|
stopCleanup: make(chan struct{}),
|
|
}
|
|
|
|
// Create webhook client after NotificationManager is initialized
|
|
nm.webhookClient = nm.createSecureWebhookClient(WebhookTimeout)
|
|
|
|
// Wire up queue processor if queue is available
|
|
if queue != nil {
|
|
queue.SetProcessor(nm.ProcessQueuedNotification)
|
|
}
|
|
|
|
// Start periodic cleanup of old lastNotified entries (every 1 hour)
|
|
go nm.cleanupOldNotificationRecords()
|
|
|
|
return nm
|
|
}
|
|
|
|
// SetPublicURL updates the public URL used for webhook payloads.
|
|
func (n *NotificationManager) SetPublicURL(publicURL string) {
|
|
trimmed := strings.TrimRight(strings.TrimSpace(publicURL), "/")
|
|
if trimmed == "" {
|
|
return
|
|
}
|
|
|
|
n.mu.Lock()
|
|
if n.publicURL == trimmed {
|
|
n.mu.Unlock()
|
|
return
|
|
}
|
|
n.publicURL = trimmed
|
|
n.mu.Unlock()
|
|
|
|
log.Info().Str("publicURL", trimmed).Msg("NotificationManager public URL updated")
|
|
}
|
|
|
|
// GetPublicURL returns the configured public URL for notifications.
|
|
func (n *NotificationManager) GetPublicURL() string {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
return n.publicURL
|
|
}
|
|
|
|
// SetEmailConfig updates email configuration
|
|
func (n *NotificationManager) SetEmailConfig(config EmailConfig) {
|
|
n.mu.Lock()
|
|
n.emailConfig = config
|
|
|
|
// Recreate email manager with new config to preserve rate limiting state
|
|
rateLimit := config.RateLimit
|
|
if rateLimit <= 0 {
|
|
rateLimit = 60
|
|
}
|
|
providerConfig := EmailProviderConfig{
|
|
EmailConfig: config,
|
|
Provider: "",
|
|
MaxRetries: 3,
|
|
RetryDelay: 5,
|
|
RateLimit: rateLimit,
|
|
StartTLS: config.StartTLS,
|
|
SkipTLSVerify: false,
|
|
AuthRequired: config.Username != "" && config.Password != "",
|
|
}
|
|
n.emailManager = NewEnhancedEmailManager(providerConfig)
|
|
queue := n.queue
|
|
n.mu.Unlock()
|
|
|
|
if !config.Enabled && queue != nil {
|
|
if err := queue.CancelByTypes([]string{"email", "email_resolved", "email_escalation"}); err != nil {
|
|
log.Error().Err(err).Msg("Failed to cancel queued email notifications after disable")
|
|
}
|
|
}
|
|
}
|
|
|
|
// SetAppriseConfig updates Apprise configuration.
|
|
func (n *NotificationManager) SetAppriseConfig(config AppriseConfig) {
|
|
n.mu.Lock()
|
|
n.appriseConfig = NormalizeAppriseConfig(config)
|
|
queue := n.queue
|
|
enabled := n.appriseConfig.Enabled
|
|
n.mu.Unlock()
|
|
|
|
if !enabled && queue != nil {
|
|
if err := queue.CancelByTypes([]string{"apprise", "apprise_resolved", "apprise_escalation"}); err != nil {
|
|
log.Error().Err(err).Msg("Failed to cancel queued Apprise notifications after disable")
|
|
}
|
|
}
|
|
}
|
|
|
|
// GetAppriseConfig returns a copy of the Apprise configuration.
|
|
func (n *NotificationManager) GetAppriseConfig() AppriseConfig {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
return copyAppriseConfig(n.appriseConfig)
|
|
}
|
|
|
|
// SetCooldown updates the cooldown duration
|
|
func (n *NotificationManager) SetCooldown(minutes int) {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
if minutes < 0 {
|
|
minutes = 0
|
|
}
|
|
n.cooldown = time.Duration(minutes) * time.Minute
|
|
log.Info().Int("minutes", minutes).Msg("Updated notification cooldown")
|
|
}
|
|
|
|
// SetNotifyOnResolve toggles whether resolved alerts send notifications.
|
|
func (n *NotificationManager) SetNotifyOnResolve(enabled bool) {
|
|
n.mu.Lock()
|
|
was := n.notifyOnResolve
|
|
n.notifyOnResolve = enabled
|
|
n.mu.Unlock()
|
|
|
|
if was != enabled {
|
|
log.Info().Bool("enabled", enabled).Msg("Updated resolved alert notifications")
|
|
}
|
|
}
|
|
|
|
// GetNotifyOnResolve returns whether resolved alerts trigger notifications.
|
|
func (n *NotificationManager) GetNotifyOnResolve() bool {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
return n.notifyOnResolve
|
|
}
|
|
|
|
// SetGroupingWindow updates the grouping window duration
|
|
func (n *NotificationManager) SetGroupingWindow(seconds int) {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
if seconds < 0 {
|
|
seconds = 0
|
|
}
|
|
n.groupWindow = time.Duration(seconds) * time.Second
|
|
log.Info().Int("seconds", seconds).Msg("Updated notification grouping window")
|
|
}
|
|
|
|
// SetGroupingOptions updates grouping options
|
|
func (n *NotificationManager) SetGroupingOptions(byNode, byGuest bool) {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
n.groupByNode = byNode
|
|
n.groupByGuest = byGuest
|
|
log.Info().Bool("byNode", byNode).Bool("byGuest", byGuest).Msg("Updated notification grouping options")
|
|
}
|
|
|
|
// AddWebhook adds a webhook configuration
|
|
func (n *NotificationManager) AddWebhook(webhook WebhookConfig) {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
n.webhooks = append(n.webhooks, webhook)
|
|
}
|
|
|
|
// UpdateWebhook updates an existing webhook
|
|
func (n *NotificationManager) UpdateWebhook(id string, webhook WebhookConfig) error {
|
|
n.mu.Lock()
|
|
queue := n.queue
|
|
for i, w := range n.webhooks {
|
|
if w.ID == id {
|
|
n.webhooks[i] = webhook
|
|
n.mu.Unlock()
|
|
if !webhook.Enabled && queue != nil {
|
|
if err := queue.CancelWebhooksByIDs([]string{id}); err != nil {
|
|
log.Error().Err(err).Str("webhookID", id).Msg("Failed to cancel queued webhook notifications after disable")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
n.mu.Unlock()
|
|
return fmt.Errorf("webhook not found: %s", id)
|
|
}
|
|
|
|
// DeleteWebhook removes a webhook
|
|
func (n *NotificationManager) DeleteWebhook(id string) error {
|
|
n.mu.Lock()
|
|
queue := n.queue
|
|
for i, w := range n.webhooks {
|
|
if w.ID == id {
|
|
n.webhooks = append(n.webhooks[:i], n.webhooks[i+1:]...)
|
|
n.mu.Unlock()
|
|
if queue != nil {
|
|
if err := queue.CancelWebhooksByIDs([]string{id}); err != nil {
|
|
log.Error().Err(err).Str("webhookID", id).Msg("Failed to cancel queued webhook notifications after delete")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
n.mu.Unlock()
|
|
return fmt.Errorf("webhook not found: %s", id)
|
|
}
|
|
|
|
// GetWebhooks returns all webhook configurations
|
|
func (n *NotificationManager) GetWebhooks() []WebhookConfig {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
|
|
if len(n.webhooks) == 0 {
|
|
return []WebhookConfig{}
|
|
}
|
|
|
|
webhooks := make([]WebhookConfig, len(n.webhooks))
|
|
copy(webhooks, n.webhooks)
|
|
return webhooks
|
|
}
|
|
|
|
// GetEmailConfig returns the email configuration
|
|
func (n *NotificationManager) GetEmailConfig() EmailConfig {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
return n.emailConfig
|
|
}
|
|
|
|
// GetQueue returns the notification queue
|
|
func (n *NotificationManager) GetQueue() *NotificationQueue {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
return n.queue
|
|
}
|
|
|
|
// SetEnabled toggles notification delivery globally for this runtime instance.
|
|
func (n *NotificationManager) SetEnabled(enabled bool) {
|
|
var (
|
|
queue *NotificationQueue
|
|
changed bool
|
|
)
|
|
|
|
n.mu.Lock()
|
|
changed = n.enabled != enabled
|
|
n.enabled = enabled
|
|
if !enabled {
|
|
for i := range n.pendingAlerts {
|
|
n.pendingAlerts[i] = nil
|
|
}
|
|
n.pendingAlerts = n.pendingAlerts[:0]
|
|
if n.groupTimer != nil {
|
|
n.groupTimer.Stop()
|
|
n.groupTimer = nil
|
|
}
|
|
queue = n.queue
|
|
}
|
|
n.mu.Unlock()
|
|
|
|
if changed {
|
|
log.Info().Bool("enabled", enabled).Msg("Updated notification manager enabled state")
|
|
}
|
|
|
|
if !enabled && queue != nil {
|
|
if err := queue.CancelByTypes([]string{
|
|
"email", "email_resolved", "email_escalation",
|
|
"webhook", "webhook_resolved", "webhook_escalation",
|
|
"apprise", "apprise_resolved", "apprise_escalation",
|
|
}); err != nil {
|
|
log.Error().Err(err).Msg("Failed to cancel queued notifications after global disable")
|
|
}
|
|
}
|
|
}
|
|
|
|
// IsEnabled reports whether notification delivery is currently enabled.
|
|
func (n *NotificationManager) IsEnabled() bool {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
return n.enabled
|
|
}
|
|
|
|
// SendAlert sends notifications for an alert
|
|
func (n *NotificationManager) SendAlert(alert *alerts.Alert) {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
|
|
log.Info().
|
|
Str("alertID", alert.ID).
|
|
Bool("enabled", n.enabled).
|
|
Int("webhooks", len(n.webhooks)).
|
|
Bool("emailEnabled", n.emailConfig.Enabled).
|
|
Msg("SendAlert called")
|
|
|
|
if !n.enabled {
|
|
log.Debug().Msg("Notifications disabled, skipping")
|
|
return
|
|
}
|
|
|
|
// Check cooldown
|
|
record, exists := n.lastNotified[alert.ID]
|
|
if exists && record.alertStart.Equal(alert.StartTime) && time.Since(record.lastSent) < n.cooldown {
|
|
log.Info().
|
|
Str("alertID", alert.ID).
|
|
Str("resourceName", alert.ResourceName).
|
|
Str("type", alert.Type).
|
|
Dur("timeSince", time.Since(record.lastSent)).
|
|
Dur("cooldown", n.cooldown).
|
|
Dur("remainingCooldown", n.cooldown-time.Since(record.lastSent)).
|
|
Msg("Alert notification in cooldown for active alert - notification suppressed")
|
|
return
|
|
}
|
|
|
|
log.Info().
|
|
Str("alertID", alert.ID).
|
|
Str("resourceName", alert.ResourceName).
|
|
Str("type", alert.Type).
|
|
Float64("value", alert.Value).
|
|
Float64("threshold", alert.Threshold).
|
|
Bool("inCooldown", exists).
|
|
Msg("Alert passed cooldown check - adding to pending notifications")
|
|
|
|
// Add to pending alerts for grouping
|
|
n.pendingAlerts = append(n.pendingAlerts, alert)
|
|
|
|
// If this is the first alert in the group, start the timer
|
|
if n.groupTimer == nil {
|
|
n.groupTimer = time.AfterFunc(n.groupWindow, func() {
|
|
n.sendGroupedAlerts()
|
|
})
|
|
log.Debug().
|
|
Int("pendingCount", len(n.pendingAlerts)).
|
|
Dur("groupWindow", n.groupWindow).
|
|
Msg("Started alert grouping timer")
|
|
}
|
|
}
|
|
|
|
// SendAlertToChannels sends an alert to specific notification channels only,
|
|
// bypassing both alert grouping and the regular notification cooldown. This is
|
|
// used for escalation notifications where the escalation level specifies which
|
|
// channels to notify — escalation has its own timing/level progression so the
|
|
// regular cooldown should not suppress it.
|
|
// Valid channel values: "email", "webhook", "apprise", "all".
|
|
func (n *NotificationManager) SendAlertToChannels(alert *alerts.Alert, channels ...string) {
|
|
n.mu.Lock()
|
|
|
|
if !n.enabled {
|
|
n.mu.Unlock()
|
|
log.Debug().Msg("Notifications disabled, skipping channel-filtered send")
|
|
return
|
|
}
|
|
|
|
// Build channel filter (normalize to lowercase for robustness)
|
|
channelSet := make(map[string]bool, len(channels))
|
|
for _, ch := range channels {
|
|
channelSet[strings.ToLower(strings.TrimSpace(ch))] = true
|
|
}
|
|
sendAll := channelSet["all"] || len(channels) == 0
|
|
|
|
// Snapshot only the requested channel configs
|
|
var emailConfig EmailConfig
|
|
if sendAll || channelSet["email"] {
|
|
emailConfig = copyEmailConfig(n.emailConfig)
|
|
}
|
|
|
|
var webhooks []WebhookConfig
|
|
if sendAll || channelSet["webhook"] {
|
|
webhooks = copyWebhookConfigs(n.webhooks)
|
|
}
|
|
|
|
var appriseConfig AppriseConfig
|
|
if sendAll || channelSet["apprise"] {
|
|
appriseConfig = copyAppriseConfig(n.appriseConfig)
|
|
}
|
|
|
|
// Check that at least one requested channel is actually enabled
|
|
hasEnabled := (emailConfig.Enabled) ||
|
|
(appriseConfig.Enabled) ||
|
|
func() bool {
|
|
for _, w := range webhooks {
|
|
if w.Enabled {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}()
|
|
|
|
if !hasEnabled {
|
|
n.mu.Unlock()
|
|
log.Info().
|
|
Str("alertID", alert.ID).
|
|
Strs("channels", channels).
|
|
Msg("No enabled channels match escalation target - skipping")
|
|
return
|
|
}
|
|
|
|
n.mu.Unlock()
|
|
|
|
log.Info().
|
|
Str("alertID", alert.ID).
|
|
Strs("channels", channels).
|
|
Msg("Sending escalation notification to filtered channels")
|
|
|
|
alertsToSend := []*alerts.Alert{alert}
|
|
|
|
// Use persistent queue with escalation-typed entries so sends get retried
|
|
// on failure, but the dequeue handler skips cooldown writes (it only marks
|
|
// cooldown for eventAlert, not eventEscalation).
|
|
if n.queue != nil {
|
|
n.enqueueEscalationNotifications(emailConfig, webhooks, appriseConfig, alertsToSend)
|
|
} else {
|
|
n.sendNotificationsDirect(emailConfig, webhooks, appriseConfig, alertsToSend)
|
|
}
|
|
}
|
|
|
|
// SendResolvedAlert delivers notifications for a resolved alert immediately.
|
|
func (n *NotificationManager) SendResolvedAlert(resolved *alerts.ResolvedAlert) {
|
|
if resolved == nil || resolved.Alert == nil {
|
|
return
|
|
}
|
|
|
|
// Clone the alert so downstream goroutines cannot mutate shared state.
|
|
alertCopy := resolved.Alert.Clone()
|
|
if alertCopy == nil {
|
|
return
|
|
}
|
|
|
|
resolvedAt := resolved.ResolvedTime
|
|
if resolvedAt.IsZero() {
|
|
resolvedAt = time.Now()
|
|
}
|
|
annotateResolvedMetadata(alertCopy, resolvedAt)
|
|
|
|
n.mu.RLock()
|
|
enabled := n.enabled && n.notifyOnResolve
|
|
emailConfig := copyEmailConfig(n.emailConfig)
|
|
webhooks := copyWebhookConfigs(n.webhooks)
|
|
appriseConfig := copyAppriseConfig(n.appriseConfig)
|
|
queue := n.queue
|
|
n.mu.RUnlock()
|
|
|
|
if !enabled {
|
|
log.Debug().
|
|
Str("alertID", alertCopy.ID).
|
|
Msg("Resolved notifications disabled, skipping")
|
|
return
|
|
}
|
|
|
|
alertsToSend := []*alerts.Alert{alertCopy}
|
|
|
|
if queue != nil {
|
|
n.enqueueResolvedNotifications(queue, emailConfig, webhooks, appriseConfig, alertsToSend, resolvedAt)
|
|
} else {
|
|
n.sendResolvedNotificationsDirect(emailConfig, webhooks, appriseConfig, alertsToSend, resolvedAt)
|
|
}
|
|
}
|
|
|
|
// CancelAlert removes pending notifications for a resolved alert and cleans
|
|
// up cooldown and queue state. Always cleans up cooldown and queued firing
|
|
// notifications even when the alert has already left the pending buffer
|
|
// (was already grouped and flushed to the queue). Refs: #1332
|
|
func (n *NotificationManager) CancelAlert(alertID string) {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
|
|
// Always clean up cooldown record for resolved alert, even if it
|
|
// already left the pending buffer (i.e. was already sent/grouped).
|
|
// Without this, a stale cooldown entry can suppress the next firing.
|
|
delete(n.lastNotified, alertID)
|
|
|
|
// Always cancel queued firing notifications for this alert. If the
|
|
// alert was flushed from the pending buffer to the persistent queue
|
|
// but hasn't been sent yet, we must cancel it to avoid delivering a
|
|
// stale "firing" notification after the alert has already resolved.
|
|
if n.queue != nil {
|
|
if err := n.queue.CancelByAlertIDs([]string{alertID}); err != nil {
|
|
log.Error().Err(err).Str("alertID", alertID).Msg("Failed to cancel queued notifications")
|
|
}
|
|
}
|
|
|
|
// Remove from the in-memory pending buffer if still there
|
|
if len(n.pendingAlerts) == 0 {
|
|
return
|
|
}
|
|
|
|
filtered := n.pendingAlerts[:0]
|
|
removed := 0
|
|
for _, pending := range n.pendingAlerts {
|
|
if pending == nil {
|
|
continue
|
|
}
|
|
if pending.ID == alertID {
|
|
removed++
|
|
continue
|
|
}
|
|
filtered = append(filtered, pending)
|
|
}
|
|
|
|
if removed == 0 {
|
|
return
|
|
}
|
|
|
|
for i := len(filtered); i < len(n.pendingAlerts); i++ {
|
|
n.pendingAlerts[i] = nil
|
|
}
|
|
|
|
n.pendingAlerts = filtered
|
|
|
|
if len(n.pendingAlerts) == 0 && n.groupTimer != nil {
|
|
if n.groupTimer.Stop() {
|
|
log.Debug().Str("alertID", alertID).Msg("Stopped grouping timer after alert cancellation")
|
|
}
|
|
n.groupTimer = nil
|
|
}
|
|
|
|
log.Debug().
|
|
Str("alertID", alertID).
|
|
Int("remaining", len(n.pendingAlerts)).
|
|
Msg("Removed resolved alert from pending notifications and cooldown map")
|
|
}
|
|
|
|
// sendGroupedAlerts sends all pending alerts as a group
|
|
func (n *NotificationManager) sendGroupedAlerts() {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
|
|
if len(n.pendingAlerts) == 0 {
|
|
return
|
|
}
|
|
|
|
// Copy alerts to send
|
|
alertsToSend := make([]*alerts.Alert, len(n.pendingAlerts))
|
|
copy(alertsToSend, n.pendingAlerts)
|
|
|
|
// Clear pending alerts
|
|
n.pendingAlerts = n.pendingAlerts[:0]
|
|
n.groupTimer = nil
|
|
|
|
log.Info().
|
|
Int("alertCount", len(alertsToSend)).
|
|
Msg("Sending grouped alert notifications")
|
|
|
|
// Snapshot configuration while holding the lock to avoid races with concurrent updates
|
|
emailConfig := copyEmailConfig(n.emailConfig)
|
|
webhooks := copyWebhookConfigs(n.webhooks)
|
|
appriseConfig := copyAppriseConfig(n.appriseConfig)
|
|
|
|
// Use persistent queue if available, otherwise send directly
|
|
if n.queue != nil {
|
|
n.enqueueNotifications(emailConfig, webhooks, appriseConfig, alertsToSend)
|
|
// Note: Cooldown will be marked after successful dequeue and send
|
|
} else {
|
|
n.sendNotificationsDirect(emailConfig, webhooks, appriseConfig, alertsToSend)
|
|
// For direct sends, mark cooldown immediately (fire-and-forget)
|
|
now := time.Now()
|
|
for _, alert := range alertsToSend {
|
|
n.lastNotified[alert.ID] = notificationRecord{
|
|
lastSent: now,
|
|
alertStart: alert.StartTime,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// enqueueNotifications adds notifications to the persistent queue
|
|
// Falls back to direct sending if enqueue fails
|
|
func (n *NotificationManager) enqueueNotifications(emailConfig EmailConfig, webhooks []WebhookConfig, appriseConfig AppriseConfig, alertsToSend []*alerts.Alert) {
|
|
anyFailed := false
|
|
|
|
// Enqueue email notification
|
|
if emailConfig.Enabled {
|
|
configJSON, err := json.Marshal(emailConfig)
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("Failed to marshal email config for queue")
|
|
} else {
|
|
notif := &QueuedNotification{
|
|
Type: "email",
|
|
Alerts: alertsToSend,
|
|
Config: configJSON,
|
|
MaxAttempts: 3,
|
|
}
|
|
if err := n.queue.Enqueue(notif); err != nil {
|
|
log.Error().Err(err).Msg("Failed to enqueue email notification - falling back to direct send")
|
|
anyFailed = true
|
|
go n.sendGroupedEmail(emailConfig, alertsToSend)
|
|
} else {
|
|
log.Debug().Int("alertCount", len(alertsToSend)).Msg("Enqueued email notification")
|
|
}
|
|
}
|
|
}
|
|
|
|
// Enqueue webhook notifications
|
|
for _, webhook := range webhooks {
|
|
if webhook.Enabled {
|
|
configJSON, err := json.Marshal(webhook)
|
|
if err != nil {
|
|
log.Error().Err(err).Str("webhookName", webhook.Name).Msg("Failed to marshal webhook config for queue")
|
|
} else {
|
|
notif := &QueuedNotification{
|
|
Type: "webhook",
|
|
Alerts: alertsToSend,
|
|
Config: configJSON,
|
|
MaxAttempts: 3,
|
|
}
|
|
if err := n.queue.Enqueue(notif); err != nil {
|
|
log.Error().Err(err).Str("webhookName", webhook.Name).Msg("Failed to enqueue webhook notification - falling back to direct send")
|
|
anyFailed = true
|
|
go n.sendGroupedWebhook(webhook, alertsToSend)
|
|
} else {
|
|
log.Debug().Str("webhookName", webhook.Name).Int("alertCount", len(alertsToSend)).Msg("Enqueued webhook notification")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Enqueue apprise notification
|
|
if appriseConfig.Enabled {
|
|
configJSON, err := json.Marshal(appriseConfig)
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("Failed to marshal apprise config for queue")
|
|
} else {
|
|
notif := &QueuedNotification{
|
|
Type: "apprise",
|
|
Alerts: alertsToSend,
|
|
Config: configJSON,
|
|
MaxAttempts: 3,
|
|
}
|
|
if err := n.queue.Enqueue(notif); err != nil {
|
|
log.Error().Err(err).Msg("Failed to enqueue apprise notification - falling back to direct send")
|
|
anyFailed = true
|
|
go n.sendGroupedApprise(appriseConfig, alertsToSend)
|
|
} else {
|
|
log.Debug().Int("alertCount", len(alertsToSend)).Msg("Enqueued apprise notification")
|
|
}
|
|
}
|
|
}
|
|
|
|
// If any enqueue failed, mark cooldown immediately for fire-and-forget sends
|
|
if anyFailed {
|
|
n.mu.Lock()
|
|
now := time.Now()
|
|
for _, alert := range alertsToSend {
|
|
n.lastNotified[alert.ID] = notificationRecord{
|
|
lastSent: now,
|
|
alertStart: alert.StartTime,
|
|
}
|
|
}
|
|
n.mu.Unlock()
|
|
}
|
|
}
|
|
|
|
// enqueueEscalationNotifications adds escalation notifications to the persistent
|
|
// queue using the "_escalation" type suffix. The dequeue handler only marks
|
|
// cooldown for eventAlert, so escalation sends won't interfere with the alert
|
|
// manager's own re-notify cadence.
|
|
func (n *NotificationManager) enqueueEscalationNotifications(emailConfig EmailConfig, webhooks []WebhookConfig, appriseConfig AppriseConfig, alertsToSend []*alerts.Alert) {
|
|
if emailConfig.Enabled {
|
|
configJSON, err := json.Marshal(emailConfig)
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("Failed to marshal email config for escalation queue")
|
|
} else {
|
|
notif := &QueuedNotification{
|
|
Type: "email" + queueTypeSuffixEscalation,
|
|
Alerts: alertsToSend,
|
|
Config: configJSON,
|
|
MaxAttempts: 3,
|
|
}
|
|
if err := n.queue.Enqueue(notif); err != nil {
|
|
log.Error().Err(err).Msg("Failed to enqueue escalation email - falling back to direct send")
|
|
go n.sendGroupedEmail(emailConfig, alertsToSend)
|
|
}
|
|
}
|
|
}
|
|
|
|
for _, webhook := range webhooks {
|
|
if webhook.Enabled {
|
|
configJSON, err := json.Marshal(webhook)
|
|
if err != nil {
|
|
log.Error().Err(err).Str("webhookName", webhook.Name).Msg("Failed to marshal webhook config for escalation queue")
|
|
} else {
|
|
notif := &QueuedNotification{
|
|
Type: "webhook" + queueTypeSuffixEscalation,
|
|
Alerts: alertsToSend,
|
|
Config: configJSON,
|
|
MaxAttempts: 3,
|
|
}
|
|
if err := n.queue.Enqueue(notif); err != nil {
|
|
log.Error().Err(err).Str("webhookName", webhook.Name).Msg("Failed to enqueue escalation webhook - falling back to direct send")
|
|
go n.sendGroupedWebhook(webhook, alertsToSend)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if appriseConfig.Enabled {
|
|
configJSON, err := json.Marshal(appriseConfig)
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("Failed to marshal apprise config for escalation queue")
|
|
} else {
|
|
notif := &QueuedNotification{
|
|
Type: "apprise" + queueTypeSuffixEscalation,
|
|
Alerts: alertsToSend,
|
|
Config: configJSON,
|
|
MaxAttempts: 3,
|
|
}
|
|
if err := n.queue.Enqueue(notif); err != nil {
|
|
log.Error().Err(err).Msg("Failed to enqueue escalation apprise - falling back to direct send")
|
|
go n.sendGroupedApprise(appriseConfig, alertsToSend)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// enqueueResolvedNotifications adds resolved notifications to the persistent queue.
|
|
func (n *NotificationManager) enqueueResolvedNotifications(queue *NotificationQueue, emailConfig EmailConfig, webhooks []WebhookConfig, appriseConfig AppriseConfig, alertsToSend []*alerts.Alert, resolvedAt time.Time) {
|
|
if queue == nil {
|
|
return
|
|
}
|
|
|
|
anyFailed := false
|
|
|
|
if emailConfig.Enabled {
|
|
configJSON, err := json.Marshal(emailConfig)
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("Failed to marshal email config for resolved queue")
|
|
} else {
|
|
notif := &QueuedNotification{
|
|
Type: "email" + queueTypeSuffixResolved,
|
|
Alerts: alertsToSend,
|
|
Config: configJSON,
|
|
MaxAttempts: 3,
|
|
}
|
|
if err := queue.Enqueue(notif); err != nil {
|
|
log.Error().Err(err).Msg("Failed to enqueue resolved email notification - falling back to direct send")
|
|
anyFailed = true
|
|
go n.sendResolvedEmail(emailConfig, alertsToSend, resolvedAt)
|
|
} else {
|
|
log.Debug().Int("alertCount", len(alertsToSend)).Msg("Enqueued resolved email notification")
|
|
}
|
|
}
|
|
}
|
|
|
|
for _, webhook := range webhooks {
|
|
if !webhook.Enabled {
|
|
continue
|
|
}
|
|
webhookCopy := webhook
|
|
configJSON, err := json.Marshal(webhookCopy)
|
|
if err != nil {
|
|
log.Error().Err(err).Str("webhookName", webhookCopy.Name).Msg("Failed to marshal webhook config for resolved queue")
|
|
continue
|
|
}
|
|
notif := &QueuedNotification{
|
|
Type: "webhook" + queueTypeSuffixResolved,
|
|
Alerts: alertsToSend,
|
|
Config: configJSON,
|
|
MaxAttempts: 3,
|
|
}
|
|
if err := queue.Enqueue(notif); err != nil {
|
|
log.Error().Err(err).Str("webhookName", webhookCopy.Name).Msg("Failed to enqueue resolved webhook notification - falling back to direct send")
|
|
anyFailed = true
|
|
go n.sendResolvedWebhook(webhookCopy, alertsToSend, resolvedAt)
|
|
} else {
|
|
log.Debug().Str("webhookName", webhookCopy.Name).Int("alertCount", len(alertsToSend)).Msg("Enqueued resolved webhook notification")
|
|
}
|
|
}
|
|
|
|
if appriseConfig.Enabled {
|
|
configJSON, err := json.Marshal(appriseConfig)
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("Failed to marshal apprise config for resolved queue")
|
|
} else {
|
|
notif := &QueuedNotification{
|
|
Type: "apprise" + queueTypeSuffixResolved,
|
|
Alerts: alertsToSend,
|
|
Config: configJSON,
|
|
MaxAttempts: 3,
|
|
}
|
|
if err := queue.Enqueue(notif); err != nil {
|
|
log.Error().Err(err).Msg("Failed to enqueue resolved Apprise notification - falling back to direct send")
|
|
anyFailed = true
|
|
go n.sendResolvedApprise(appriseConfig, alertsToSend, resolvedAt)
|
|
} else {
|
|
log.Debug().Int("alertCount", len(alertsToSend)).Msg("Enqueued resolved Apprise notification")
|
|
}
|
|
}
|
|
}
|
|
|
|
if anyFailed {
|
|
log.Debug().Msg("At least one resolved notification enqueue failed; direct sends were triggered")
|
|
}
|
|
}
|
|
|
|
// sendNotificationsDirect sends notifications without using the queue (fallback)
|
|
func (n *NotificationManager) sendNotificationsDirect(emailConfig EmailConfig, webhooks []WebhookConfig, appriseConfig AppriseConfig, alertsToSend []*alerts.Alert) {
|
|
// Send notifications using the captured snapshots outside the lock to avoid blocking writers
|
|
if emailConfig.Enabled {
|
|
log.Info().
|
|
Int("alertCount", len(alertsToSend)).
|
|
Str("smtpHost", emailConfig.SMTPHost).
|
|
Int("smtpPort", emailConfig.SMTPPort).
|
|
Strs("recipients", emailConfig.To).
|
|
Bool("hasAuth", emailConfig.Username != "" && emailConfig.Password != "").
|
|
Msg("Email notifications enabled - sending grouped email")
|
|
go n.sendGroupedEmail(emailConfig, alertsToSend)
|
|
} else {
|
|
log.Debug().
|
|
Int("alertCount", len(alertsToSend)).
|
|
Msg("Email notifications disabled - skipping email delivery")
|
|
}
|
|
|
|
for _, webhook := range webhooks {
|
|
if webhook.Enabled {
|
|
go n.sendGroupedWebhook(webhook, alertsToSend)
|
|
}
|
|
}
|
|
|
|
if appriseConfig.Enabled {
|
|
go n.sendGroupedApprise(appriseConfig, alertsToSend)
|
|
}
|
|
}
|
|
|
|
// sendResolvedNotificationsDirect delivers resolved notifications without queue persistence.
|
|
func (n *NotificationManager) sendResolvedNotificationsDirect(emailConfig EmailConfig, webhooks []WebhookConfig, appriseConfig AppriseConfig, alertsToSend []*alerts.Alert, resolvedAt time.Time) {
|
|
if len(alertsToSend) == 0 {
|
|
return
|
|
}
|
|
|
|
if emailConfig.Enabled {
|
|
go func() {
|
|
if err := n.sendResolvedEmail(emailConfig, alertsToSend, resolvedAt); err != nil {
|
|
log.Error().Err(err).Msg("Failed to send resolved email notification")
|
|
}
|
|
}()
|
|
}
|
|
|
|
for _, webhook := range webhooks {
|
|
if !webhook.Enabled {
|
|
continue
|
|
}
|
|
webhookCopy := webhook
|
|
go func() {
|
|
if err := n.sendResolvedWebhook(webhookCopy, alertsToSend, resolvedAt); err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhookName", webhookCopy.Name).
|
|
Msg("Failed to send resolved webhook notification")
|
|
}
|
|
}()
|
|
}
|
|
|
|
if appriseConfig.Enabled {
|
|
go func() {
|
|
if err := n.sendResolvedApprise(appriseConfig, alertsToSend, resolvedAt); err != nil {
|
|
log.Error().Err(err).Msg("Failed to send resolved Apprise notification")
|
|
}
|
|
}()
|
|
}
|
|
}
|
|
|
|
// sendGroupedEmail sends a grouped email notification
|
|
func (n *NotificationManager) sendGroupedEmail(config EmailConfig, alertList []*alerts.Alert) error {
|
|
|
|
// Don't check for recipients here - sendHTMLEmail handles empty recipients
|
|
// by using the From address as the recipient
|
|
|
|
// Generate email using template
|
|
subject, htmlBody, textBody := EmailTemplate(alertList, false)
|
|
|
|
// Send using HTML-aware method
|
|
return n.sendHTMLEmailWithError(subject, htmlBody, textBody, config)
|
|
}
|
|
|
|
func (n *NotificationManager) sendResolvedEmail(config EmailConfig, alertList []*alerts.Alert, resolvedAt time.Time) error {
|
|
if len(alertList) == 0 {
|
|
return fmt.Errorf("no alerts to send")
|
|
}
|
|
|
|
subject, htmlBody, textBody := buildResolvedNotificationContent(alertList, resolvedAt, n.publicURL)
|
|
if subject == "" && textBody == "" {
|
|
return fmt.Errorf("failed to build resolved email content")
|
|
}
|
|
|
|
return n.sendHTMLEmailWithError(subject, htmlBody, textBody, config)
|
|
}
|
|
|
|
func (n *NotificationManager) sendGroupedApprise(config AppriseConfig, alertList []*alerts.Alert) error {
|
|
if len(alertList) == 0 {
|
|
return fmt.Errorf("no alerts to send")
|
|
}
|
|
|
|
cfg := NormalizeAppriseConfig(config)
|
|
if !cfg.Enabled {
|
|
return fmt.Errorf("apprise not enabled")
|
|
}
|
|
|
|
title, body, notifyType := buildApprisePayload(alertList, n.publicURL)
|
|
if title == "" && body == "" {
|
|
return fmt.Errorf("failed to build apprise payload")
|
|
}
|
|
|
|
switch cfg.Mode {
|
|
case AppriseModeHTTP:
|
|
if err := n.sendAppriseViaHTTP(cfg, title, body, notifyType); err != nil {
|
|
log.Warn().
|
|
Err(err).
|
|
Str("mode", string(cfg.Mode)).
|
|
Str("serverUrl", cfg.ServerURL).
|
|
Msg("Failed to send Apprise notification via API")
|
|
return fmt.Errorf("apprise HTTP send failed: %w", err)
|
|
}
|
|
default:
|
|
if err := n.sendAppriseViaCLI(cfg, title, body); err != nil {
|
|
log.Warn().
|
|
Err(err).
|
|
Str("mode", string(cfg.Mode)).
|
|
Str("cliPath", cfg.CLIPath).
|
|
Strs("targets", cfg.Targets).
|
|
Msg("Failed to send Apprise notification")
|
|
return fmt.Errorf("apprise CLI send failed: %w", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func buildApprisePayload(alertList []*alerts.Alert, publicURL string) (string, string, string) {
|
|
validAlerts := make([]*alerts.Alert, 0, len(alertList))
|
|
var primary *alerts.Alert
|
|
for _, alert := range alertList {
|
|
if alert == nil {
|
|
continue
|
|
}
|
|
if primary == nil {
|
|
primary = alert
|
|
}
|
|
validAlerts = append(validAlerts, alert)
|
|
}
|
|
|
|
if len(validAlerts) == 0 || primary == nil {
|
|
return "", "", "info"
|
|
}
|
|
|
|
title := fmt.Sprintf("Pulse alert: %s", primary.ResourceName)
|
|
if len(validAlerts) > 1 {
|
|
title = fmt.Sprintf("Pulse alerts (%d)", len(validAlerts))
|
|
}
|
|
|
|
var bodyBuilder strings.Builder
|
|
bodyBuilder.WriteString(primary.Message)
|
|
bodyBuilder.WriteString("\n\n")
|
|
|
|
for _, alert := range validAlerts {
|
|
bodyBuilder.WriteString(fmt.Sprintf("[%s] %s", strings.ToUpper(string(alert.Level)), alert.ResourceName))
|
|
bodyBuilder.WriteString(fmt.Sprintf(" — value %.2f (threshold %.2f)\n", alert.Value, alert.Threshold))
|
|
if alert.Node != "" {
|
|
bodyBuilder.WriteString(fmt.Sprintf("Node: %s\n", alertNodeDisplay(alert)))
|
|
}
|
|
if alert.Instance != "" && alert.Instance != alert.Node {
|
|
bodyBuilder.WriteString(fmt.Sprintf("Instance: %s\n", alert.Instance))
|
|
}
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
|
|
if publicURL != "" {
|
|
bodyBuilder.WriteString("Dashboard: " + publicURL + "\n")
|
|
}
|
|
|
|
return title, bodyBuilder.String(), resolveAppriseNotificationType(validAlerts)
|
|
}
|
|
|
|
func buildResolvedNotificationContent(alertList []*alerts.Alert, resolvedAt time.Time, publicURL string) (string, string, string) {
|
|
validAlerts := make([]*alerts.Alert, 0, len(alertList))
|
|
var primary *alerts.Alert
|
|
for _, alert := range alertList {
|
|
if alert == nil {
|
|
continue
|
|
}
|
|
if primary == nil {
|
|
primary = alert
|
|
}
|
|
validAlerts = append(validAlerts, alert)
|
|
}
|
|
|
|
if len(validAlerts) == 0 || primary == nil {
|
|
return "", "", ""
|
|
}
|
|
|
|
if resolvedAt.IsZero() {
|
|
resolvedAt = time.Now()
|
|
}
|
|
resolvedLabel := resolvedAt.Format(time.RFC3339)
|
|
|
|
title := fmt.Sprintf("Pulse alert resolved: %s", primary.ResourceName)
|
|
if len(validAlerts) > 1 {
|
|
title = fmt.Sprintf("Pulse alerts resolved (%d)", len(validAlerts))
|
|
}
|
|
|
|
var bodyBuilder strings.Builder
|
|
bodyBuilder.WriteString("Resolved at ")
|
|
bodyBuilder.WriteString(resolvedLabel)
|
|
bodyBuilder.WriteString("\n\n")
|
|
|
|
for _, alert := range validAlerts {
|
|
bodyBuilder.WriteString(fmt.Sprintf("[%s] %s\n", strings.ToUpper(string(alert.Level)), alert.ResourceName))
|
|
if alert.Message != "" {
|
|
bodyBuilder.WriteString(alert.Message)
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
if !alert.StartTime.IsZero() {
|
|
bodyBuilder.WriteString("Started: ")
|
|
bodyBuilder.WriteString(alert.StartTime.Format(time.RFC3339))
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
bodyBuilder.WriteString("Cleared: ")
|
|
bodyBuilder.WriteString(resolvedLabel)
|
|
bodyBuilder.WriteString("\n")
|
|
if alert.Node != "" {
|
|
bodyBuilder.WriteString("Node: ")
|
|
bodyBuilder.WriteString(alertNodeDisplay(alert))
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
if alert.Instance != "" && alert.Instance != alert.Node {
|
|
bodyBuilder.WriteString("Instance: ")
|
|
bodyBuilder.WriteString(alert.Instance)
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
if alert.Threshold != 0 || alert.Value != 0 {
|
|
bodyBuilder.WriteString(fmt.Sprintf("Last value %.2f (threshold %.2f)\n", alert.Value, alert.Threshold))
|
|
}
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
|
|
if publicURL != "" {
|
|
bodyBuilder.WriteString("Dashboard: ")
|
|
bodyBuilder.WriteString(publicURL)
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
|
|
textBody := bodyBuilder.String()
|
|
htmlBody := "<pre style=\"font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, \\\"Liberation Mono\\\", \\\"Courier New\\\", monospace\">" +
|
|
html.EscapeString(textBody) + "</pre>"
|
|
|
|
return title, htmlBody, textBody
|
|
}
|
|
|
|
func resolveAppriseNotificationType(alertList []*alerts.Alert) string {
|
|
notifyType := "info"
|
|
for _, alert := range alertList {
|
|
if alert == nil {
|
|
continue
|
|
}
|
|
switch alert.Level {
|
|
case alerts.AlertLevelCritical:
|
|
return "failure"
|
|
case alerts.AlertLevelWarning:
|
|
notifyType = "warning"
|
|
}
|
|
}
|
|
return notifyType
|
|
}
|
|
|
|
func (n *NotificationManager) sendAppriseViaCLI(cfg AppriseConfig, title, body string) error {
|
|
if len(cfg.Targets) == 0 {
|
|
return fmt.Errorf("no Apprise targets configured for CLI delivery")
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(cfg.TimeoutSeconds)*time.Second)
|
|
defer cancel()
|
|
|
|
args := []string{"-t", title, "-b", body}
|
|
args = append(args, cfg.Targets...)
|
|
|
|
execFn := n.appriseExec
|
|
if execFn == nil {
|
|
execFn = defaultAppriseExec
|
|
}
|
|
|
|
output, err := execFn(ctx, args)
|
|
if err != nil {
|
|
if len(output) > 0 {
|
|
log.Debug().
|
|
Str("cliPath", cfg.CLIPath).
|
|
Strs("targets", cfg.Targets).
|
|
Str("output", string(output)).
|
|
Msg("Apprise CLI output (error)")
|
|
}
|
|
return err
|
|
}
|
|
|
|
if len(output) > 0 {
|
|
log.Debug().
|
|
Str("cliPath", cfg.CLIPath).
|
|
Strs("targets", cfg.Targets).
|
|
Str("output", string(output)).
|
|
Msg("Apprise CLI output")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (n *NotificationManager) sendAppriseViaHTTP(cfg AppriseConfig, title, body, notifyType string) error {
|
|
if cfg.ServerURL == "" {
|
|
return fmt.Errorf("apprise server URL is not configured")
|
|
}
|
|
|
|
serverURL := cfg.ServerURL
|
|
lowerURL := strings.ToLower(serverURL)
|
|
if !strings.HasPrefix(lowerURL, "http://") && !strings.HasPrefix(lowerURL, "https://") {
|
|
return fmt.Errorf("apprise server URL must start with http or https: %s", serverURL)
|
|
}
|
|
|
|
// Validate Apprise server URL to prevent SSRF
|
|
if err := n.ValidateWebhookURL(serverURL); err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("serverURL", serverURL).
|
|
Msg("Apprise server URL validation failed - possible SSRF attempt")
|
|
return fmt.Errorf("apprise server URL validation failed: %w", err)
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(cfg.TimeoutSeconds)*time.Second)
|
|
defer cancel()
|
|
|
|
notifyEndpoint := "/notify"
|
|
if cfg.ConfigKey != "" {
|
|
notifyEndpoint = "/notify/" + url.PathEscape(cfg.ConfigKey)
|
|
}
|
|
|
|
requestURL := strings.TrimRight(serverURL, "/") + notifyEndpoint
|
|
|
|
payload := map[string]any{
|
|
"body": body,
|
|
"title": title,
|
|
}
|
|
if len(cfg.Targets) > 0 {
|
|
payload["urls"] = cfg.Targets
|
|
}
|
|
if notifyType != "" {
|
|
payload["type"] = notifyType
|
|
}
|
|
|
|
payloadBytes, err := json.Marshal(payload)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal Apprise payload: %w", err)
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, requestURL, bytes.NewReader(payloadBytes))
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create Apprise request: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("Accept", "application/json")
|
|
|
|
if cfg.APIKey != "" {
|
|
if cfg.APIKeyHeader == "" {
|
|
req.Header.Set("X-API-KEY", cfg.APIKey)
|
|
} else {
|
|
req.Header.Set(cfg.APIKeyHeader, cfg.APIKey)
|
|
}
|
|
}
|
|
|
|
client := &http.Client{
|
|
Timeout: time.Duration(cfg.TimeoutSeconds) * time.Second,
|
|
}
|
|
|
|
if strings.HasPrefix(lowerURL, "https://") && cfg.SkipTLSVerify {
|
|
client.Transport = &http.Transport{
|
|
Proxy: http.ProxyFromEnvironment,
|
|
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
|
}
|
|
}
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to reach Apprise server: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
limited := io.LimitReader(resp.Body, WebhookMaxResponseSize)
|
|
respBody, _ := io.ReadAll(limited)
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
if len(respBody) > 0 {
|
|
return fmt.Errorf("apprise server returned HTTP %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
|
|
}
|
|
return fmt.Errorf("apprise server returned HTTP %d", resp.StatusCode)
|
|
}
|
|
|
|
if len(respBody) > 0 {
|
|
log.Debug().
|
|
Str("mode", string(cfg.Mode)).
|
|
Str("serverUrl", cfg.ServerURL).
|
|
Str("response", string(respBody)).
|
|
Msg("Apprise API response")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (n *NotificationManager) sendResolvedApprise(config AppriseConfig, alertList []*alerts.Alert, resolvedAt time.Time) error {
|
|
if len(alertList) == 0 {
|
|
return fmt.Errorf("no alerts to send")
|
|
}
|
|
|
|
cfg := NormalizeAppriseConfig(config)
|
|
if !cfg.Enabled {
|
|
return fmt.Errorf("apprise not enabled")
|
|
}
|
|
|
|
title, _, body := buildResolvedNotificationContent(alertList, resolvedAt, n.publicURL)
|
|
if title == "" && body == "" {
|
|
return fmt.Errorf("failed to build resolved apprise payload")
|
|
}
|
|
|
|
switch cfg.Mode {
|
|
case AppriseModeHTTP:
|
|
if err := n.sendAppriseViaHTTP(cfg, title, body, "info"); err != nil {
|
|
log.Warn().
|
|
Err(err).
|
|
Str("mode", string(cfg.Mode)).
|
|
Str("serverUrl", cfg.ServerURL).
|
|
Msg("Failed to send resolved Apprise notification via API")
|
|
return fmt.Errorf("apprise HTTP send failed: %w", err)
|
|
}
|
|
default:
|
|
if err := n.sendAppriseViaCLI(cfg, title, body); err != nil {
|
|
log.Warn().
|
|
Err(err).
|
|
Str("mode", string(cfg.Mode)).
|
|
Str("cliPath", cfg.CLIPath).
|
|
Strs("targets", cfg.Targets).
|
|
Msg("Failed to send resolved Apprise notification")
|
|
return fmt.Errorf("apprise CLI send failed: %w", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// sendEmail sends an email notification
|
|
func (n *NotificationManager) sendEmail(alert *alerts.Alert) {
|
|
n.mu.RLock()
|
|
config := n.emailConfig
|
|
n.mu.RUnlock()
|
|
|
|
// Don't check for recipients here - sendHTMLEmail handles empty recipients
|
|
// by using the From address as the recipient
|
|
|
|
// Generate email using template
|
|
subject, htmlBody, textBody := EmailTemplate([]*alerts.Alert{alert}, true)
|
|
|
|
// Send using HTML-aware method
|
|
n.sendHTMLEmail(subject, htmlBody, textBody, config)
|
|
}
|
|
|
|
// sendHTMLEmailWithError sends an HTML email with multipart content and returns any error
|
|
func (n *NotificationManager) sendHTMLEmailWithError(subject, htmlBody, textBody string, config EmailConfig) error {
|
|
// Use From address as recipient if To is empty
|
|
recipients := config.To
|
|
if len(recipients) == 0 && config.From != "" {
|
|
recipients = []string{config.From}
|
|
log.Info().
|
|
Str("from", config.From).
|
|
Msg("Using From address as recipient since To is empty")
|
|
}
|
|
|
|
// Use shared email manager for rate limiting, or create a new one if not available
|
|
n.mu.RLock()
|
|
manager := n.emailManager
|
|
n.mu.RUnlock()
|
|
|
|
if manager == nil {
|
|
// Create email manager if not yet initialized
|
|
rl := config.RateLimit
|
|
if rl <= 0 {
|
|
rl = 60
|
|
}
|
|
enhancedConfig := EmailProviderConfig{
|
|
EmailConfig: EmailConfig{
|
|
From: config.From,
|
|
To: recipients,
|
|
SMTPHost: config.SMTPHost,
|
|
SMTPPort: config.SMTPPort,
|
|
Username: config.Username,
|
|
Password: config.Password,
|
|
},
|
|
Provider: config.Provider,
|
|
StartTLS: config.StartTLS,
|
|
MaxRetries: 2,
|
|
RetryDelay: 3,
|
|
RateLimit: rl,
|
|
SkipTLSVerify: false,
|
|
AuthRequired: config.Username != "" && config.Password != "",
|
|
}
|
|
manager = NewEnhancedEmailManager(enhancedConfig)
|
|
} else {
|
|
// Update manager's config but preserve rate limiter
|
|
manager.config.EmailConfig.From = config.From
|
|
manager.config.EmailConfig.To = recipients
|
|
}
|
|
|
|
log.Info().
|
|
Str("smtp", fmt.Sprintf("%s:%d", config.SMTPHost, config.SMTPPort)).
|
|
Str("from", config.From).
|
|
Strs("to", recipients).
|
|
Bool("hasAuth", config.Username != "" && config.Password != "").
|
|
Bool("startTLS", manager.config.StartTLS).
|
|
Msg("Attempting to send email via SMTP with enhanced support")
|
|
|
|
err := manager.SendEmailWithRetry(subject, htmlBody, textBody)
|
|
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("smtp", fmt.Sprintf("%s:%d", config.SMTPHost, config.SMTPPort)).
|
|
Strs("recipients", recipients).
|
|
Msg("Failed to send email notification")
|
|
return fmt.Errorf("failed to send email: %w", err)
|
|
}
|
|
|
|
log.Info().
|
|
Strs("recipients", recipients).
|
|
Int("recipientCount", len(recipients)).
|
|
Msg("Email notification sent successfully")
|
|
return nil
|
|
}
|
|
|
|
// sendHTMLEmail sends an HTML email with multipart content
|
|
func (n *NotificationManager) sendHTMLEmail(subject, htmlBody, textBody string, config EmailConfig) {
|
|
// Use From address as recipient if To is empty
|
|
recipients := config.To
|
|
if len(recipients) == 0 && config.From != "" {
|
|
recipients = []string{config.From}
|
|
log.Info().
|
|
Str("from", config.From).
|
|
Msg("Using From address as recipient since To is empty")
|
|
}
|
|
|
|
// Create enhanced email configuration with proper STARTTLS support
|
|
rl := config.RateLimit
|
|
if rl <= 0 {
|
|
rl = 60
|
|
}
|
|
enhancedConfig := EmailProviderConfig{
|
|
EmailConfig: EmailConfig{
|
|
From: config.From,
|
|
To: recipients,
|
|
SMTPHost: config.SMTPHost,
|
|
SMTPPort: config.SMTPPort,
|
|
Username: config.Username,
|
|
Password: config.Password,
|
|
},
|
|
Provider: config.Provider,
|
|
StartTLS: config.StartTLS, // Use the configured StartTLS setting
|
|
MaxRetries: 2,
|
|
RetryDelay: 3,
|
|
RateLimit: rl,
|
|
SkipTLSVerify: false,
|
|
AuthRequired: config.Username != "" && config.Password != "",
|
|
}
|
|
|
|
// Use enhanced email manager for better compatibility
|
|
enhancedManager := NewEnhancedEmailManager(enhancedConfig)
|
|
|
|
log.Info().
|
|
Str("smtp", fmt.Sprintf("%s:%d", config.SMTPHost, config.SMTPPort)).
|
|
Str("from", config.From).
|
|
Strs("to", recipients).
|
|
Bool("hasAuth", config.Username != "" && config.Password != "").
|
|
Bool("startTLS", enhancedConfig.StartTLS).
|
|
Msg("Attempting to send email via SMTP with enhanced support")
|
|
|
|
err := enhancedManager.SendEmailWithRetry(subject, htmlBody, textBody)
|
|
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("smtp", fmt.Sprintf("%s:%d", config.SMTPHost, config.SMTPPort)).
|
|
Strs("recipients", recipients).
|
|
Msg("Failed to send email notification")
|
|
} else {
|
|
log.Info().
|
|
Strs("recipients", recipients).
|
|
Int("recipientCount", len(recipients)).
|
|
Msg("Email notification sent successfully")
|
|
}
|
|
}
|
|
|
|
// sendGroupedWebhook sends a grouped webhook notification
|
|
func (n *NotificationManager) sendGroupedWebhook(webhook WebhookConfig, alertList []*alerts.Alert) error {
|
|
var jsonData []byte
|
|
var err error
|
|
|
|
if len(alertList) == 0 {
|
|
return fmt.Errorf("no alerts to send")
|
|
}
|
|
|
|
// Create a shallow copy of the primary alert to avoid mutating the original memory
|
|
// when we modify the message for grouped summaries.
|
|
originalPrimary := alertList[0]
|
|
alertCopy := *originalPrimary
|
|
primaryAlert := &alertCopy
|
|
customFields := convertWebhookCustomFields(webhook.CustomFields)
|
|
|
|
var templateData WebhookPayloadData
|
|
var dataPrepared bool
|
|
var urlRendered bool
|
|
var serviceDataApplied bool
|
|
|
|
prepareData := func() *WebhookPayloadData {
|
|
if !dataPrepared {
|
|
prepared := n.prepareWebhookData(primaryAlert, customFields)
|
|
prepared.AlertCount = len(alertList)
|
|
prepared.Alerts = alertList
|
|
prepared.Mention = webhook.Mention
|
|
templateData = prepared
|
|
dataPrepared = true
|
|
}
|
|
return &templateData
|
|
}
|
|
|
|
ensureURLAndServiceData := func() (*WebhookPayloadData, bool) {
|
|
dataPtr := prepareData()
|
|
|
|
if !urlRendered {
|
|
rendered, renderErr := renderWebhookURL(webhook.URL, *dataPtr)
|
|
if renderErr != nil {
|
|
log.Error().
|
|
Err(renderErr).
|
|
Str("webhook", webhook.Name).
|
|
Msg("Failed to render webhook URL template for grouped notification")
|
|
return nil, false
|
|
}
|
|
webhook.URL = rendered
|
|
urlRendered = true
|
|
}
|
|
|
|
if !serviceDataApplied {
|
|
switch webhook.Service {
|
|
case "telegram":
|
|
chatID, chatErr := extractTelegramChatID(webhook.URL)
|
|
if chatErr != nil {
|
|
log.Error().
|
|
Err(chatErr).
|
|
Str("webhook", webhook.Name).
|
|
Msg("Failed to extract Telegram chat_id for grouped notification")
|
|
return nil, false
|
|
}
|
|
if chatID != "" {
|
|
dataPtr.ChatID = chatID
|
|
log.Debug().
|
|
Str("webhook", webhook.Name).
|
|
Str("chatID", chatID).
|
|
Msg("Extracted Telegram chat_id from rendered URL for grouped notification")
|
|
}
|
|
case "pagerduty":
|
|
if dataPtr.CustomFields == nil {
|
|
dataPtr.CustomFields = make(map[string]interface{})
|
|
}
|
|
if routingKey, ok := webhook.Headers["routing_key"]; ok {
|
|
dataPtr.CustomFields["routing_key"] = routingKey
|
|
}
|
|
case "pushover":
|
|
dataPtr.CustomFields = ensurePushoverCustomFieldAliases(dataPtr.CustomFields)
|
|
}
|
|
serviceDataApplied = true
|
|
}
|
|
|
|
return dataPtr, true
|
|
}
|
|
|
|
// Check if webhook has a custom template first
|
|
// Only use custom template if it's not empty
|
|
if webhook.Template != "" && strings.TrimSpace(webhook.Template) != "" && len(alertList) > 0 {
|
|
// Use custom template with enhanced message for grouped alerts
|
|
alert := primaryAlert
|
|
if len(alertList) > 1 {
|
|
// Build a full list of all alerts
|
|
summary := alert.Message
|
|
otherAlerts := []string{}
|
|
for i := 1; i < len(alertList); i++ { // Show ALL alerts
|
|
otherAlerts = append(otherAlerts, fmt.Sprintf("• %s: %.1f%%", alertList[i].ResourceName, alertList[i].Value))
|
|
}
|
|
if len(otherAlerts) > 0 {
|
|
// For custom templates, we need to escape newlines since they're likely
|
|
// used in shell commands or other contexts that need escaping
|
|
alert.Message = fmt.Sprintf("%s\\n\\nAll %d alerts:\\n%s", summary, len(alertList), strings.Join(otherAlerts, "\\n"))
|
|
}
|
|
}
|
|
|
|
enhanced := EnhancedWebhookConfig{
|
|
WebhookConfig: webhook,
|
|
Service: webhook.Service,
|
|
PayloadTemplate: webhook.Template,
|
|
CustomFields: customFields,
|
|
}
|
|
|
|
if dataPtr, ok := ensureURLAndServiceData(); ok {
|
|
jsonData, err = n.generatePayloadFromTemplateWithService(enhanced.PayloadTemplate, *dataPtr, webhook.Service)
|
|
} else {
|
|
return fmt.Errorf("failed to prepare webhook URL and service data")
|
|
}
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Int("alertCount", len(alertList)).
|
|
Msg("Failed to generate grouped payload from custom template")
|
|
return fmt.Errorf("failed to generate payload from custom template: %w", err)
|
|
}
|
|
} else if webhook.Service != "" && webhook.Service != "generic" && len(alertList) > 0 {
|
|
// For service-specific webhooks, use the first alert with a note about others
|
|
// For simplicity, send the first alert with a note about others
|
|
// Most webhook services work better with single structured payloads
|
|
alert := primaryAlert
|
|
|
|
enhanced := EnhancedWebhookConfig{
|
|
WebhookConfig: webhook,
|
|
Service: webhook.Service,
|
|
CustomFields: customFields,
|
|
}
|
|
|
|
// Get service template
|
|
templates := GetWebhookTemplates()
|
|
templateFound := false
|
|
for _, tmpl := range templates {
|
|
if tmpl.Service == webhook.Service {
|
|
enhanced.PayloadTemplate = tmpl.PayloadTemplate
|
|
templateFound = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if templateFound {
|
|
// Modify message if multiple alerts - but format differently for Discord
|
|
if len(alertList) > 1 {
|
|
summary := alert.Message
|
|
otherAlerts := []string{}
|
|
for i := 1; i < len(alertList); i++ {
|
|
otherAlerts = append(otherAlerts, fmt.Sprintf("• %s: %.1f%%", alertList[i].ResourceName, alertList[i].Value))
|
|
}
|
|
if len(otherAlerts) > 0 {
|
|
// For Discord, format as a single line list to avoid newline issues
|
|
// Discord embeds don't render \n in description anyway
|
|
if webhook.Service == "discord" {
|
|
// Use comma-separated list for Discord
|
|
alert.Message = fmt.Sprintf("%s | %d alerts: %s", summary, len(alertList), strings.Join(otherAlerts, ", "))
|
|
} else {
|
|
// For other services, escape newlines properly
|
|
alert.Message = fmt.Sprintf("%s\\n\\nAll %d alerts:\\n%s", summary, len(alertList), strings.Join(otherAlerts, "\\n"))
|
|
}
|
|
}
|
|
}
|
|
|
|
if dataPtr, ok := ensureURLAndServiceData(); ok {
|
|
jsonData, err = n.generatePayloadFromTemplateWithService(enhanced.PayloadTemplate, *dataPtr, webhook.Service)
|
|
} else {
|
|
return fmt.Errorf("failed to prepare webhook URL and service data")
|
|
}
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Int("alertCount", len(alertList)).
|
|
Msg("Failed to generate payload for grouped alerts")
|
|
return fmt.Errorf("failed to generate payload for grouped alerts: %w", err)
|
|
}
|
|
} else {
|
|
// No template found, use generic payload
|
|
webhook.Service = "generic"
|
|
}
|
|
}
|
|
|
|
// Use generic payload if no service or template not found
|
|
// But ONLY if jsonData hasn't been set yet (from custom template)
|
|
if jsonData == nil && (webhook.Service == "" || webhook.Service == "generic") {
|
|
if _, ok := ensureURLAndServiceData(); !ok {
|
|
return fmt.Errorf("failed to prepare webhook URL and service data")
|
|
}
|
|
|
|
// Use generic payload for other services
|
|
payload := map[string]interface{}{
|
|
"alerts": alertList,
|
|
"count": len(alertList),
|
|
"timestamp": time.Now().Unix(),
|
|
"source": "pulse-monitoring",
|
|
"grouped": true,
|
|
}
|
|
|
|
jsonData, err = json.Marshal(payload)
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Int("alertCount", len(alertList)).
|
|
Msg("Failed to marshal grouped webhook payload")
|
|
return fmt.Errorf("failed to marshal grouped webhook payload: %w", err)
|
|
}
|
|
}
|
|
|
|
if _, ok := ensureURLAndServiceData(); !ok {
|
|
return fmt.Errorf("failed to prepare webhook URL and service data")
|
|
}
|
|
|
|
// Send using same request logic
|
|
return n.sendWebhookRequest(webhook, jsonData, "grouped")
|
|
}
|
|
|
|
func (n *NotificationManager) sendResolvedWebhook(webhook WebhookConfig, alertList []*alerts.Alert, resolvedAt time.Time) error {
|
|
if len(alertList) == 0 {
|
|
return fmt.Errorf("no alerts to send")
|
|
}
|
|
|
|
if !webhook.Enabled {
|
|
return fmt.Errorf("webhook is disabled")
|
|
}
|
|
|
|
if resolvedAt.IsZero() {
|
|
resolvedAt = time.Now()
|
|
}
|
|
|
|
// ntfy needs plain-text body + headers, not JSON
|
|
if webhook.Service == "ntfy" {
|
|
return n.sendResolvedWebhookNtfy(webhook, alertList, resolvedAt)
|
|
}
|
|
|
|
// Use custom or service-specific templates for resolved webhooks (mirrors sendWebhook logic).
|
|
// Custom template takes precedence, then service template.
|
|
// For service webhooks, return an error on failure rather than sending a generic payload
|
|
// that the service endpoint would reject anyway.
|
|
hasCustomTemplate := webhook.Template != "" && strings.TrimSpace(webhook.Template) != ""
|
|
hasServiceTemplate := webhook.Service != "" && webhook.Service != "generic"
|
|
|
|
if hasCustomTemplate || hasServiceTemplate {
|
|
alert := alertList[0]
|
|
if alert == nil {
|
|
return fmt.Errorf("first alert in resolved list is nil for service webhook %s", webhook.Name)
|
|
}
|
|
|
|
data := n.prepareResolvedWebhookData(alert, webhook, resolvedAt)
|
|
data.AlertCount = len(alertList)
|
|
if len(alertList) > 1 {
|
|
data.Message = fmt.Sprintf("Resolved: %d alerts cleared", len(alertList))
|
|
}
|
|
data.Alerts = alertList
|
|
|
|
// Render URL template if placeholders are present
|
|
renderedURL, renderErr := renderWebhookURL(webhook.URL, data)
|
|
if renderErr != nil {
|
|
return fmt.Errorf("failed to render resolved webhook URL template for %s: %w", webhook.Name, renderErr)
|
|
}
|
|
webhook.URL = renderedURL
|
|
|
|
// Service-specific data enrichment (mirrors sendWebhook logic)
|
|
if webhook.Service == "telegram" {
|
|
chatID, chatErr := extractTelegramChatID(renderedURL)
|
|
if chatErr != nil {
|
|
return fmt.Errorf("failed to extract Telegram chat_id for resolved webhook %s: %w", webhook.Name, chatErr)
|
|
}
|
|
if chatID != "" {
|
|
data.ChatID = chatID
|
|
}
|
|
} else if webhook.Service == "pagerduty" {
|
|
if data.CustomFields == nil {
|
|
data.CustomFields = make(map[string]interface{})
|
|
}
|
|
if routingKey, ok := webhook.Headers["routing_key"]; ok {
|
|
data.CustomFields["routing_key"] = routingKey
|
|
}
|
|
}
|
|
|
|
// Try custom template first, then service template
|
|
if hasCustomTemplate {
|
|
jsonData, err := n.generatePayloadFromTemplateWithService(webhook.Template, data, webhook.Service)
|
|
if err == nil {
|
|
return n.sendWebhookRequest(webhook, jsonData, "resolved")
|
|
}
|
|
log.Warn().Err(err).Str("webhook", webhook.Name).Msg("Failed to render resolved custom template, trying service template")
|
|
// Fall through to service template if available
|
|
}
|
|
|
|
if hasServiceTemplate {
|
|
templates := GetWebhookTemplates()
|
|
for _, tmpl := range templates {
|
|
if tmpl.Service == webhook.Service && tmpl.ResolvedPayloadTemplate != "" {
|
|
jsonData, err := n.generatePayloadFromTemplateWithService(tmpl.ResolvedPayloadTemplate, data, webhook.Service)
|
|
if err == nil {
|
|
return n.sendWebhookRequest(webhook, jsonData, "resolved")
|
|
}
|
|
return fmt.Errorf("failed to render resolved %s template for %s: %w", webhook.Service, webhook.Name, err)
|
|
}
|
|
}
|
|
// No resolved template for this service — fall through to generic payload
|
|
}
|
|
// Fall through to generic payload below
|
|
}
|
|
|
|
// Generic payload for webhooks with no service and no custom template
|
|
payload := map[string]interface{}{
|
|
"event": string(eventResolved),
|
|
"alerts": alertList,
|
|
"count": len(alertList),
|
|
"resolvedAt": resolvedAt.Unix(),
|
|
"resolvedAtIso": resolvedAt.Format(time.RFC3339),
|
|
"source": "pulse-monitoring",
|
|
}
|
|
|
|
if n.publicURL != "" {
|
|
payload["dashboard"] = n.publicURL
|
|
}
|
|
|
|
if len(alertList) == 1 && alertList[0] != nil {
|
|
payload["alertId"] = alertList[0].ID
|
|
}
|
|
|
|
jsonData, err := json.Marshal(payload)
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Int("alertCount", len(alertList)).
|
|
Msg("Failed to marshal resolved webhook payload")
|
|
return fmt.Errorf("failed to marshal resolved webhook payload: %w", err)
|
|
}
|
|
|
|
return n.sendWebhookRequest(webhook, jsonData, "resolved")
|
|
}
|
|
|
|
// sendResolvedWebhookNtfy sends a resolved webhook formatted for ntfy (plain text + headers)
|
|
func (n *NotificationManager) sendResolvedWebhookNtfy(webhook WebhookConfig, alertList []*alerts.Alert, resolvedAt time.Time) error {
|
|
// Re-validate webhook URL
|
|
if err := n.ValidateWebhookURL(webhook.URL); err != nil {
|
|
return fmt.Errorf("webhook URL validation failed: %w", err)
|
|
}
|
|
|
|
if !n.checkWebhookRateLimit(webhook.URL) {
|
|
return fmt.Errorf("rate limit exceeded for webhook %s", webhook.Name)
|
|
}
|
|
|
|
// Build plain-text body
|
|
var body strings.Builder
|
|
if len(alertList) == 1 && alertList[0] != nil {
|
|
a := alertList[0]
|
|
fmt.Fprintf(&body, "Resolved: %s on %s is now healthy", a.ResourceName, a.Node)
|
|
} else {
|
|
fmt.Fprintf(&body, "%d alerts resolved at %s:\n", len(alertList), resolvedAt.Format(time.RFC822))
|
|
for _, a := range alertList {
|
|
if a != nil {
|
|
fmt.Fprintf(&body, "- %s on %s\n", a.ResourceName, a.Node)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Build title
|
|
title := "RESOLVED"
|
|
if len(alertList) == 1 && alertList[0] != nil {
|
|
title = fmt.Sprintf("RESOLVED: %s", alertList[0].ResourceName)
|
|
} else {
|
|
title = fmt.Sprintf("RESOLVED: %d alerts", len(alertList))
|
|
}
|
|
|
|
method := webhook.Method
|
|
if method == "" {
|
|
method = "POST"
|
|
}
|
|
|
|
req, err := http.NewRequest(method, webhook.URL, bytes.NewBufferString(body.String()))
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create ntfy request: %w", err)
|
|
}
|
|
|
|
req.Header.Set("Content-Type", "text/plain")
|
|
req.Header.Set("Title", title)
|
|
req.Header.Set("Priority", "default")
|
|
req.Header.Set("Tags", "white_check_mark,pulse,resolved")
|
|
req.Header.Set("User-Agent", "Pulse-Monitoring/2.0")
|
|
|
|
// Apply any custom headers from webhook config
|
|
for key, value := range webhook.Headers {
|
|
if !strings.Contains(value, "{{") {
|
|
req.Header.Set(key, value)
|
|
}
|
|
}
|
|
|
|
resp, err := n.webhookClient.Do(req)
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Msg("Failed to send resolved ntfy webhook")
|
|
return fmt.Errorf("failed to send ntfy webhook: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// Read response with size limit
|
|
limitedReader := io.LimitReader(resp.Body, WebhookMaxResponseSize)
|
|
var respBody bytes.Buffer
|
|
respBody.ReadFrom(limitedReader)
|
|
|
|
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
|
log.Info().
|
|
Str("webhook", webhook.Name).
|
|
Str("service", "ntfy").
|
|
Str("type", "resolved").
|
|
Int("status", resp.StatusCode).
|
|
Int("alertCount", len(alertList)).
|
|
Msg("Resolved ntfy webhook sent successfully")
|
|
return nil
|
|
}
|
|
|
|
log.Warn().
|
|
Str("webhook", webhook.Name).
|
|
Str("service", "ntfy").
|
|
Int("status", resp.StatusCode).
|
|
Str("response", respBody.String()).
|
|
Msg("Resolved ntfy webhook returned non-success status")
|
|
return fmt.Errorf("ntfy webhook returned HTTP %d: %s", resp.StatusCode, respBody.String())
|
|
}
|
|
|
|
// checkWebhookRateLimit checks if a webhook can be sent based on rate limits
|
|
func (n *NotificationManager) checkWebhookRateLimit(webhookURL string) bool {
|
|
n.webhookRateMu.Lock()
|
|
defer n.webhookRateMu.Unlock()
|
|
|
|
now := time.Now()
|
|
limit, exists := n.webhookRateLimits[webhookURL]
|
|
|
|
if !exists {
|
|
// First time sending to this webhook
|
|
n.webhookRateLimits[webhookURL] = &webhookRateLimit{
|
|
lastSent: now,
|
|
sentCount: 1,
|
|
}
|
|
return true
|
|
}
|
|
|
|
// Check if we're still in the rate limit window
|
|
if now.Sub(limit.lastSent) > WebhookRateLimitWindow {
|
|
// Window expired, reset counter
|
|
limit.lastSent = now
|
|
limit.sentCount = 1
|
|
return true
|
|
}
|
|
|
|
// Still in window, check if we've exceeded the limit
|
|
if limit.sentCount >= WebhookRateLimitMax {
|
|
log.Warn().
|
|
Str("webhookURL", webhookURL).
|
|
Int("sentCount", limit.sentCount).
|
|
Dur("window", WebhookRateLimitWindow).
|
|
Msg("Webhook rate limit exceeded, dropping request")
|
|
return false
|
|
}
|
|
|
|
// Increment counter and allow
|
|
limit.sentCount++
|
|
return true
|
|
}
|
|
|
|
// sendWebhookRequest sends the actual webhook request
|
|
func (n *NotificationManager) sendWebhookRequest(webhook WebhookConfig, jsonData []byte, alertType string) error {
|
|
// Re-validate webhook URL to prevent DNS rebinding attacks
|
|
if err := n.ValidateWebhookURL(webhook.URL); err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Str("url", webhook.URL).
|
|
Msg("Webhook URL validation failed at send time - possible DNS rebinding")
|
|
return fmt.Errorf("webhook URL validation failed: %w", err)
|
|
}
|
|
|
|
// Check rate limit before sending
|
|
if !n.checkWebhookRateLimit(webhook.URL) {
|
|
log.Warn().
|
|
Str("webhook", webhook.Name).
|
|
Str("url", webhook.URL).
|
|
Msg("Webhook request dropped due to rate limiting")
|
|
return fmt.Errorf("rate limit exceeded for webhook %s", webhook.Name)
|
|
}
|
|
|
|
// Create request
|
|
method := webhook.Method
|
|
if method == "" {
|
|
method = "POST"
|
|
}
|
|
|
|
// For Telegram webhooks, strip chat_id from URL if present
|
|
// The chat_id should only be in the JSON body, not the URL
|
|
webhookURL := webhook.URL
|
|
if webhook.Service == "telegram" && strings.Contains(webhookURL, "chat_id=") {
|
|
if u, err := url.Parse(webhookURL); err == nil {
|
|
q := u.Query()
|
|
q.Del("chat_id") // Remove chat_id from query params
|
|
u.RawQuery = q.Encode()
|
|
webhookURL = u.String()
|
|
log.Debug().
|
|
Str("original", webhook.URL).
|
|
Str("cleaned", webhookURL).
|
|
Msg("Stripped chat_id from Telegram webhook URL")
|
|
}
|
|
}
|
|
|
|
req, err := http.NewRequest(method, webhookURL, bytes.NewBuffer(jsonData))
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Str("type", alertType).
|
|
Msg("Failed to create webhook request")
|
|
return fmt.Errorf("failed to create webhook request: %w", err)
|
|
}
|
|
|
|
// Set headers
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("User-Agent", "Pulse-Monitoring/2.0")
|
|
|
|
// Special handling for ntfy service
|
|
if webhook.Service == "ntfy" {
|
|
// Set Content-Type for ntfy (plain text)
|
|
req.Header.Set("Content-Type", "text/plain")
|
|
// Note: Dynamic headers for ntfy are set in sendWebhook for individual alerts
|
|
}
|
|
|
|
// Apply any custom headers from webhook config
|
|
for key, value := range webhook.Headers {
|
|
// Skip template-like headers (those with {{) to prevent errors
|
|
if !strings.Contains(value, "{{") {
|
|
req.Header.Set(key, value)
|
|
}
|
|
}
|
|
|
|
// Debug log for Telegram and Gotify webhooks (without secrets)
|
|
if webhook.Service == "telegram" || webhook.Service == "gotify" {
|
|
log.Debug().
|
|
Str("webhook", webhook.Name).
|
|
Str("service", webhook.Service).
|
|
Msg("Sending webhook")
|
|
}
|
|
|
|
// Send request with shared secure client
|
|
resp, err := n.webhookClient.Do(req)
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Str("type", alertType).
|
|
Msg("Failed to send webhook")
|
|
return fmt.Errorf("failed to send webhook: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// Read response body with size limit to prevent memory exhaustion
|
|
limitedReader := io.LimitReader(resp.Body, WebhookMaxResponseSize)
|
|
var respBody bytes.Buffer
|
|
bytesRead, err := respBody.ReadFrom(limitedReader)
|
|
if err != nil {
|
|
log.Warn().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Str("type", alertType).
|
|
Msg("Failed to read webhook response body")
|
|
return fmt.Errorf("failed to read webhook response: %w", err)
|
|
}
|
|
|
|
// Check if we hit the size limit
|
|
if bytesRead >= WebhookMaxResponseSize {
|
|
log.Warn().
|
|
Str("webhook", webhook.Name).
|
|
Int64("bytesRead", bytesRead).
|
|
Int("maxSize", WebhookMaxResponseSize).
|
|
Msg("Webhook response exceeded size limit, truncated")
|
|
}
|
|
|
|
responseBody := respBody.String()
|
|
|
|
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
|
log.Info().
|
|
Str("webhook", webhook.Name).
|
|
Str("service", webhook.Service).
|
|
Str("type", alertType).
|
|
Int("status", resp.StatusCode).
|
|
Int("payloadSize", len(jsonData)).
|
|
Msg("Webhook notification sent successfully")
|
|
|
|
// Log response body only in debug mode for successful requests
|
|
if len(responseBody) > 0 {
|
|
log.Debug().
|
|
Str("webhook", webhook.Name).
|
|
Str("response", responseBody).
|
|
Msg("Webhook response body")
|
|
}
|
|
return nil
|
|
} else {
|
|
log.Warn().
|
|
Str("webhook", webhook.Name).
|
|
Str("service", webhook.Service).
|
|
Str("type", alertType).
|
|
Int("status", resp.StatusCode).
|
|
Str("response", responseBody).
|
|
Msg("Webhook returned non-success status")
|
|
return fmt.Errorf("webhook returned HTTP %d: %s", resp.StatusCode, responseBody)
|
|
}
|
|
}
|
|
|
|
// sendWebhook sends a webhook notification
|
|
func (n *NotificationManager) sendWebhook(webhook WebhookConfig, alert *alerts.Alert) {
|
|
var jsonData []byte
|
|
var err error
|
|
|
|
customFields := convertWebhookCustomFields(webhook.CustomFields)
|
|
data := n.prepareWebhookData(alert, customFields)
|
|
|
|
// Render URL template if placeholders are present
|
|
renderedURL, renderErr := renderWebhookURL(webhook.URL, data)
|
|
if renderErr != nil {
|
|
log.Error().
|
|
Err(renderErr).
|
|
Str("webhook", webhook.Name).
|
|
Msg("Failed to render webhook URL template")
|
|
return
|
|
}
|
|
webhook.URL = renderedURL
|
|
|
|
// Service-specific data enrichment
|
|
if webhook.Service == "telegram" {
|
|
chatID, chatErr := extractTelegramChatID(renderedURL)
|
|
if chatErr != nil {
|
|
log.Error().
|
|
Err(chatErr).
|
|
Str("webhook", webhook.Name).
|
|
Msg("Failed to extract Telegram chat_id - skipping webhook")
|
|
return
|
|
}
|
|
if chatID != "" {
|
|
data.ChatID = chatID
|
|
log.Debug().
|
|
Str("webhook", webhook.Name).
|
|
Str("chatID", chatID).
|
|
Msg("Extracted Telegram chat_id from rendered URL")
|
|
}
|
|
} else if webhook.Service == "pagerduty" {
|
|
if data.CustomFields == nil {
|
|
data.CustomFields = make(map[string]interface{})
|
|
}
|
|
if routingKey, ok := webhook.Headers["routing_key"]; ok {
|
|
data.CustomFields["routing_key"] = routingKey
|
|
}
|
|
}
|
|
|
|
// Check if webhook has a custom template first
|
|
// Only use custom template if it's not empty
|
|
if webhook.Template != "" && strings.TrimSpace(webhook.Template) != "" {
|
|
// Use custom template provided by user
|
|
enhanced := EnhancedWebhookConfig{
|
|
WebhookConfig: webhook,
|
|
Service: webhook.Service,
|
|
PayloadTemplate: webhook.Template,
|
|
CustomFields: customFields,
|
|
}
|
|
|
|
jsonData, err = n.generatePayloadFromTemplateWithService(enhanced.PayloadTemplate, data, webhook.Service)
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Str("alertID", alert.ID).
|
|
Msg("Failed to generate webhook payload from custom template")
|
|
return
|
|
}
|
|
} else if webhook.Service != "" && webhook.Service != "generic" {
|
|
// Check if this webhook has a service type and use the proper template
|
|
// Convert to enhanced webhook to use template
|
|
enhanced := EnhancedWebhookConfig{
|
|
WebhookConfig: webhook,
|
|
Service: webhook.Service,
|
|
CustomFields: customFields,
|
|
}
|
|
|
|
// Get service template
|
|
templates := GetWebhookTemplates()
|
|
templateFound := false
|
|
for _, tmpl := range templates {
|
|
if tmpl.Service == webhook.Service {
|
|
enhanced.PayloadTemplate = tmpl.PayloadTemplate
|
|
templateFound = true
|
|
break
|
|
}
|
|
}
|
|
|
|
// Only use template if found, otherwise fall back to generic
|
|
if templateFound {
|
|
jsonData, err = n.generatePayloadFromTemplateWithService(enhanced.PayloadTemplate, data, webhook.Service)
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Str("service", webhook.Service).
|
|
Str("alertID", alert.ID).
|
|
Msg("Failed to generate webhook payload")
|
|
return
|
|
}
|
|
} else {
|
|
// No template found, use generic payload
|
|
webhook.Service = "generic"
|
|
}
|
|
}
|
|
|
|
// Use generic payload if no service or template not found
|
|
// But ONLY if jsonData hasn't been set yet (from custom template)
|
|
if jsonData == nil && (webhook.Service == "" || webhook.Service == "generic") {
|
|
// Use generic payload for other services
|
|
payload := map[string]interface{}{
|
|
"alert": alert,
|
|
"timestamp": time.Now().Unix(),
|
|
"source": "pulse-monitoring",
|
|
}
|
|
|
|
jsonData, err = json.Marshal(payload)
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Str("alertID", alert.ID).
|
|
Msg("Failed to marshal webhook payload")
|
|
return
|
|
}
|
|
}
|
|
|
|
// Send using common request logic
|
|
n.sendWebhookRequest(webhook, jsonData, fmt.Sprintf("alert-%s", alert.ID))
|
|
}
|
|
|
|
func convertWebhookCustomFields(fields map[string]string) map[string]interface{} {
|
|
if len(fields) == 0 {
|
|
return nil
|
|
}
|
|
|
|
converted := make(map[string]interface{}, len(fields))
|
|
for key, value := range fields {
|
|
converted[key] = value
|
|
}
|
|
return converted
|
|
}
|
|
|
|
func ensurePushoverCustomFieldAliases(fields map[string]interface{}) map[string]interface{} {
|
|
if fields == nil {
|
|
return nil
|
|
}
|
|
|
|
if _, ok := fields["token"]; !ok || isEmptyInterface(fields["token"]) {
|
|
if legacy, ok := fields["app_token"]; ok && !isEmptyInterface(legacy) {
|
|
fields["token"] = legacy
|
|
}
|
|
}
|
|
|
|
if _, ok := fields["user"]; !ok || isEmptyInterface(fields["user"]) {
|
|
if legacy, ok := fields["user_token"]; ok && !isEmptyInterface(legacy) {
|
|
fields["user"] = legacy
|
|
}
|
|
}
|
|
|
|
return fields
|
|
}
|
|
|
|
func isEmptyInterface(value interface{}) bool {
|
|
switch v := value.(type) {
|
|
case string:
|
|
return strings.TrimSpace(v) == ""
|
|
case fmt.Stringer:
|
|
return strings.TrimSpace(v.String()) == ""
|
|
case nil:
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// prepareWebhookData prepares data for template rendering
|
|
func (n *NotificationManager) prepareWebhookData(alert *alerts.Alert, customFields map[string]interface{}) WebhookPayloadData {
|
|
duration := time.Since(alert.StartTime)
|
|
|
|
// Construct full Pulse URL if publicURL is configured
|
|
// The Instance field should contain the full URL to the Pulse dashboard
|
|
instance := ""
|
|
if n.publicURL != "" {
|
|
// Remove trailing slash from publicURL if present
|
|
instance = strings.TrimRight(n.publicURL, "/")
|
|
} else if alert.Instance != "" && (strings.HasPrefix(alert.Instance, "http://") || strings.HasPrefix(alert.Instance, "https://")) {
|
|
// If publicURL is not set but alert.Instance contains a full URL, use it
|
|
instance = alert.Instance
|
|
}
|
|
|
|
resourceType := ""
|
|
if alert.Metadata != nil {
|
|
if rt, ok := alert.Metadata["resourceType"].(string); ok {
|
|
resourceType = rt
|
|
}
|
|
}
|
|
|
|
var metadataCopy map[string]interface{}
|
|
if alert.Metadata != nil {
|
|
metadataCopy = make(map[string]interface{}, len(alert.Metadata))
|
|
for k, v := range alert.Metadata {
|
|
metadataCopy[k] = v
|
|
}
|
|
}
|
|
|
|
var ackTime string
|
|
if alert.AckTime != nil {
|
|
ackTime = alert.AckTime.Format(time.RFC3339)
|
|
}
|
|
|
|
// Round Value and Threshold to 1 decimal place for cleaner webhook payloads
|
|
roundedValue := math.Round(alert.Value*10) / 10
|
|
roundedThreshold := math.Round(alert.Threshold*10) / 10
|
|
|
|
return WebhookPayloadData{
|
|
ID: alert.ID,
|
|
Level: string(alert.Level),
|
|
Type: alert.Type,
|
|
ResourceName: alert.ResourceName,
|
|
ResourceID: alert.ResourceID,
|
|
Node: alert.Node,
|
|
NodeDisplayName: alertNodeDisplay(alert),
|
|
Instance: instance,
|
|
Message: alert.Message,
|
|
Value: roundedValue,
|
|
Threshold: roundedThreshold,
|
|
ValueFormatted: formatMetricValue(alert.Type, alert.Value),
|
|
ThresholdFormatted: formatMetricThreshold(alert.Type, alert.Threshold),
|
|
StartTime: alert.StartTime.Format(time.RFC3339),
|
|
Duration: formatWebhookDuration(duration),
|
|
Timestamp: time.Now().Format(time.RFC3339),
|
|
ResourceType: resourceType,
|
|
Acknowledged: alert.Acknowledged,
|
|
AckTime: ackTime,
|
|
AckUser: alert.AckUser,
|
|
Event: "alert",
|
|
Metadata: metadataCopy,
|
|
CustomFields: customFields,
|
|
AlertCount: 1,
|
|
}
|
|
}
|
|
|
|
// prepareResolvedWebhookData builds a WebhookPayloadData for a resolved alert,
|
|
// suitable for rendering through service-specific templates (Discord, Slack, Teams, etc.).
|
|
func (n *NotificationManager) prepareResolvedWebhookData(alert *alerts.Alert, webhook WebhookConfig, resolvedAt time.Time) WebhookPayloadData {
|
|
duration := resolvedAt.Sub(alert.StartTime)
|
|
|
|
instance := ""
|
|
if n.publicURL != "" {
|
|
instance = strings.TrimRight(n.publicURL, "/")
|
|
} else if alert.Instance != "" && (strings.HasPrefix(alert.Instance, "http://") || strings.HasPrefix(alert.Instance, "https://")) {
|
|
instance = alert.Instance
|
|
}
|
|
|
|
resourceType := ""
|
|
if alert.Metadata != nil {
|
|
if rt, ok := alert.Metadata["resourceType"].(string); ok {
|
|
resourceType = rt
|
|
}
|
|
}
|
|
|
|
var metadataCopy map[string]interface{}
|
|
if alert.Metadata != nil {
|
|
metadataCopy = make(map[string]interface{}, len(alert.Metadata))
|
|
for k, v := range alert.Metadata {
|
|
metadataCopy[k] = v
|
|
}
|
|
}
|
|
|
|
var ackTime string
|
|
if alert.AckTime != nil {
|
|
ackTime = alert.AckTime.Format(time.RFC3339)
|
|
}
|
|
|
|
roundedValue := math.Round(alert.Value*10) / 10
|
|
roundedThreshold := math.Round(alert.Threshold*10) / 10
|
|
|
|
// Build a human-readable resolved message
|
|
message := fmt.Sprintf("Resolved: %s on %s is now healthy", alert.ResourceName, alertNodeDisplay(alert))
|
|
|
|
return WebhookPayloadData{
|
|
ID: alert.ID,
|
|
Level: "resolved",
|
|
Type: alert.Type,
|
|
ResourceName: alert.ResourceName,
|
|
ResourceID: alert.ResourceID,
|
|
Node: alert.Node,
|
|
NodeDisplayName: alertNodeDisplay(alert),
|
|
Instance: instance,
|
|
Message: message,
|
|
Value: roundedValue,
|
|
Threshold: roundedThreshold,
|
|
ValueFormatted: formatMetricValue(alert.Type, alert.Value),
|
|
ThresholdFormatted: formatMetricThreshold(alert.Type, alert.Threshold),
|
|
StartTime: alert.StartTime.Format(time.RFC3339),
|
|
Duration: formatWebhookDuration(duration),
|
|
Timestamp: resolvedAt.Format(time.RFC3339),
|
|
ResourceType: resourceType,
|
|
Acknowledged: alert.Acknowledged,
|
|
AckTime: ackTime,
|
|
AckUser: alert.AckUser,
|
|
Event: "resolved",
|
|
ResolvedAt: resolvedAt.Format(time.RFC3339),
|
|
ResolvedAtISO: resolvedAt.Format(time.RFC3339),
|
|
Metadata: metadataCopy,
|
|
CustomFields: convertWebhookCustomFields(webhook.CustomFields),
|
|
AlertCount: 1,
|
|
Mention: webhook.Mention,
|
|
}
|
|
}
|
|
|
|
func templateFuncMap() template.FuncMap {
|
|
return template.FuncMap{
|
|
"title": func(s string) string {
|
|
if s == "" {
|
|
return s
|
|
}
|
|
return strings.ToUpper(s[:1]) + strings.ToLower(s[1:])
|
|
},
|
|
"jsonString": func(v interface{}) string {
|
|
encoded, err := json.Marshal(v)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if len(encoded) >= 2 && encoded[0] == '"' && encoded[len(encoded)-1] == '"' {
|
|
return string(encoded[1 : len(encoded)-1])
|
|
}
|
|
return string(encoded)
|
|
},
|
|
"upper": strings.ToUpper,
|
|
"lower": strings.ToLower,
|
|
"printf": fmt.Sprintf,
|
|
"urlquery": template.URLQueryEscaper,
|
|
"urlencode": template.URLQueryEscaper,
|
|
"urlpath": url.PathEscape,
|
|
"pathescape": func(s string) string {
|
|
return url.PathEscape(s)
|
|
},
|
|
}
|
|
}
|
|
|
|
// generatePayloadFromTemplateWithService renders the payload using Go templates with service-specific handling
|
|
func (n *NotificationManager) generatePayloadFromTemplateWithService(templateStr string, data WebhookPayloadData, service string) ([]byte, error) {
|
|
tmpl, err := template.New("webhook").Funcs(templateFuncMap()).Parse(templateStr)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid template: %w", err)
|
|
}
|
|
|
|
var buf bytes.Buffer
|
|
if err := tmpl.Execute(&buf, data); err != nil {
|
|
return nil, fmt.Errorf("template execution failed: %w", err)
|
|
}
|
|
|
|
// Skip JSON validation for services that use plain text payloads
|
|
if service == "ntfy" {
|
|
// ntfy uses plain text, not JSON
|
|
return buf.Bytes(), nil
|
|
}
|
|
|
|
// Validate that the generated payload is valid JSON for other services
|
|
var jsonCheck interface{}
|
|
if err := json.Unmarshal(buf.Bytes(), &jsonCheck); err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("payload", buf.String()).
|
|
Msg("Generated webhook payload is invalid JSON")
|
|
return nil, fmt.Errorf("template produced invalid JSON: %w", err)
|
|
}
|
|
|
|
return buf.Bytes(), nil
|
|
}
|
|
|
|
// renderWebhookURL applies template rendering to webhook URLs and ensures the result is a valid URL
|
|
func renderWebhookURL(urlTemplate string, data WebhookPayloadData) (string, error) {
|
|
trimmed := strings.TrimSpace(urlTemplate)
|
|
if trimmed == "" {
|
|
return "", fmt.Errorf("webhook URL cannot be empty")
|
|
}
|
|
|
|
if !strings.Contains(trimmed, "{{") {
|
|
return trimmed, nil
|
|
}
|
|
|
|
tmpl, err := template.New("webhook_url").Funcs(templateFuncMap()).Parse(trimmed)
|
|
if err != nil {
|
|
return "", fmt.Errorf("invalid webhook URL template: %w", err)
|
|
}
|
|
|
|
var buf bytes.Buffer
|
|
if err := tmpl.Execute(&buf, data); err != nil {
|
|
return "", fmt.Errorf("webhook URL template execution failed: %w", err)
|
|
}
|
|
|
|
rendered := strings.TrimSpace(buf.String())
|
|
if rendered == "" {
|
|
return "", fmt.Errorf("webhook URL template produced empty URL")
|
|
}
|
|
|
|
parsed, err := url.Parse(rendered)
|
|
if err != nil {
|
|
return "", fmt.Errorf("webhook URL template produced invalid URL: %w", err)
|
|
}
|
|
|
|
if parsed.Scheme == "" || parsed.Host == "" {
|
|
return "", fmt.Errorf("webhook URL template produced invalid URL: missing scheme or host")
|
|
}
|
|
|
|
return parsed.String(), nil
|
|
}
|
|
|
|
// formatWebhookDuration formats a duration in a human-readable way
|
|
func formatWebhookDuration(d time.Duration) string {
|
|
if d < time.Minute {
|
|
return fmt.Sprintf("%ds", int(d.Seconds()))
|
|
} else if d < time.Hour {
|
|
return fmt.Sprintf("%dm", int(d.Minutes()))
|
|
} else if d < 24*time.Hour {
|
|
return fmt.Sprintf("%dh %dm", int(d.Hours()), int(d.Minutes())%60)
|
|
} else {
|
|
days := int(d.Hours()) / 24
|
|
hours := int(d.Hours()) % 24
|
|
return fmt.Sprintf("%dd %dh", days, hours)
|
|
}
|
|
}
|
|
|
|
// extractTelegramChatID extracts and validates the chat_id from a Telegram webhook URL
|
|
func extractTelegramChatID(webhookURL string) (string, error) {
|
|
if !strings.Contains(webhookURL, "chat_id=") {
|
|
return "", fmt.Errorf("telegram webhook URL missing chat_id parameter")
|
|
}
|
|
|
|
u, err := url.Parse(webhookURL)
|
|
if err != nil {
|
|
return "", fmt.Errorf("invalid URL format: %w", err)
|
|
}
|
|
|
|
chatID := u.Query().Get("chat_id")
|
|
if chatID == "" {
|
|
return "", fmt.Errorf("chat_id parameter is empty")
|
|
}
|
|
|
|
// Validate that chat_id is numeric (Telegram chat IDs are always numeric)
|
|
// Handle negative IDs (group chats) and positive IDs (private chats)
|
|
if strings.HasPrefix(chatID, "-") {
|
|
if !isNumeric(chatID[1:]) {
|
|
return "", fmt.Errorf("chat_id must be numeric, got: %s", chatID)
|
|
}
|
|
} else if !isNumeric(chatID) {
|
|
return "", fmt.Errorf("chat_id must be numeric, got: %s", chatID)
|
|
}
|
|
|
|
return chatID, nil
|
|
}
|
|
|
|
// isNumeric checks if a string contains only digits
|
|
func isNumeric(s string) bool {
|
|
for _, char := range s {
|
|
if char < '0' || char > '9' {
|
|
return false
|
|
}
|
|
}
|
|
return len(s) > 0
|
|
}
|
|
|
|
// ValidateWebhookURL validates that a webhook URL is safe and properly formed
|
|
func (n *NotificationManager) ValidateWebhookURL(webhookURL string) error {
|
|
if webhookURL == "" {
|
|
return fmt.Errorf("webhook URL cannot be empty")
|
|
}
|
|
|
|
u, err := url.Parse(webhookURL)
|
|
if err != nil {
|
|
return fmt.Errorf("invalid URL format: %w", err)
|
|
}
|
|
|
|
// Must be HTTP or HTTPS
|
|
if u.Scheme != "http" && u.Scheme != "https" {
|
|
return fmt.Errorf("webhook URL must use http or https protocol")
|
|
}
|
|
|
|
// Get hostname for validation
|
|
host := u.Hostname()
|
|
if host == "" {
|
|
return fmt.Errorf("webhook URL missing hostname")
|
|
}
|
|
|
|
// Block localhost and loopback addresses (SSRF protection) unless allowlisted
|
|
if host == "localhost" || host == "127.0.0.1" || host == "::1" || strings.HasPrefix(host, "127.") {
|
|
// Check if localhost is in the allowlist
|
|
localhostIP := net.ParseIP("127.0.0.1")
|
|
if !n.isIPInAllowlist(localhostIP) {
|
|
return fmt.Errorf("webhook URLs pointing to localhost are not allowed for security reasons")
|
|
}
|
|
log.Debug().
|
|
Str("host", host).
|
|
Str("url", webhookURL).
|
|
Msg("Localhost webhook URL allowed via allowlist")
|
|
}
|
|
|
|
// Block link-local addresses
|
|
if strings.HasPrefix(host, "169.254.") || strings.HasPrefix(host, "fe80:") {
|
|
return fmt.Errorf("webhook URLs pointing to link-local addresses are not allowed")
|
|
}
|
|
|
|
// Resolve hostname to IPs and check for private ranges (DNS rebinding protection)
|
|
ips, err := net.LookupIP(host)
|
|
if err != nil {
|
|
// DNS resolution failed - reject for security
|
|
return fmt.Errorf("failed to resolve webhook hostname %s: %w (DNS resolution required for security)", host, err)
|
|
}
|
|
|
|
// Check all resolved IPs for private ranges
|
|
for _, ip := range ips {
|
|
if isPrivateIP(ip) {
|
|
// Check if this private IP is in the allowlist
|
|
if n.isIPInAllowlist(ip) {
|
|
log.Debug().
|
|
Str("ip", ip.String()).
|
|
Str("url", webhookURL).
|
|
Msg("Webhook URL resolves to private IP in allowlist")
|
|
} else {
|
|
return fmt.Errorf("webhook URL resolves to private IP %s - private networks are not allowed for security (configure allowlist in System Settings)", ip.String())
|
|
}
|
|
}
|
|
}
|
|
|
|
// Block common metadata service endpoints (cloud providers)
|
|
metadataHosts := []string{
|
|
"169.254.169.254", // AWS, Azure, GCP metadata
|
|
"metadata.google.internal",
|
|
"metadata.goog",
|
|
}
|
|
for _, metadataHost := range metadataHosts {
|
|
if host == metadataHost {
|
|
return fmt.Errorf("webhook URLs pointing to cloud metadata services are not allowed")
|
|
}
|
|
}
|
|
|
|
// Ensure hostname is not just an IP address without proper DNS
|
|
// This helps prevent SSRF attacks using numeric IPs to bypass filters
|
|
if u.Scheme == "https" && isNumericIP(host) {
|
|
log.Warn().
|
|
Str("url", webhookURL).
|
|
Msg("Webhook URL uses numeric IP with HTTPS - certificate validation may fail")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// isPrivateIP checks if an IP address is in a private range
|
|
func isPrivateIP(ip net.IP) bool {
|
|
// Private IPv4 ranges
|
|
privateRanges := []string{
|
|
"10.0.0.0/8", // RFC1918
|
|
"172.16.0.0/12", // RFC1918
|
|
"192.168.0.0/16", // RFC1918
|
|
"127.0.0.0/8", // Loopback
|
|
"169.254.0.0/16", // Link-local
|
|
"::1/128", // IPv6 loopback
|
|
"fe80::/10", // IPv6 link-local
|
|
"fc00::/7", // IPv6 unique local
|
|
}
|
|
|
|
for _, cidr := range privateRanges {
|
|
_, ipnet, err := net.ParseCIDR(cidr)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
if ipnet.Contains(ip) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// isNumericIP checks if a string is a numeric IP address
|
|
func isNumericIP(host string) bool {
|
|
// Simple check: if it contains only digits, dots, and colons, it's likely an IP
|
|
for _, char := range host {
|
|
if !(char >= '0' && char <= '9') && char != '.' && char != ':' {
|
|
return false
|
|
}
|
|
}
|
|
return len(host) > 0 && (strings.Contains(host, ".") || strings.Contains(host, ":"))
|
|
}
|
|
|
|
// UpdateAllowedPrivateCIDRs parses and updates the list of allowed private CIDR ranges for webhooks
|
|
func (n *NotificationManager) UpdateAllowedPrivateCIDRs(cidrsString string) error {
|
|
n.allowedPrivateMu.Lock()
|
|
defer n.allowedPrivateMu.Unlock()
|
|
|
|
// Clear existing allowlist
|
|
n.allowedPrivateNets = nil
|
|
|
|
// Empty string means no allowlist (block all private IPs)
|
|
if cidrsString == "" {
|
|
log.Info().Msg("Webhook private IP allowlist cleared - all private IPs blocked")
|
|
return nil
|
|
}
|
|
|
|
// Parse comma-separated CIDRs
|
|
cidrs := strings.Split(cidrsString, ",")
|
|
var parsedNets []*net.IPNet
|
|
|
|
for _, cidr := range cidrs {
|
|
cidr = strings.TrimSpace(cidr)
|
|
if cidr == "" {
|
|
continue
|
|
}
|
|
|
|
// Support bare IPs by adding /32 or /128
|
|
if !strings.Contains(cidr, "/") {
|
|
ip := net.ParseIP(cidr)
|
|
if ip == nil {
|
|
return fmt.Errorf("invalid IP address: %s", cidr)
|
|
}
|
|
if ip.To4() != nil {
|
|
cidr = cidr + "/32"
|
|
} else {
|
|
cidr = cidr + "/128"
|
|
}
|
|
}
|
|
|
|
_, ipNet, err := net.ParseCIDR(cidr)
|
|
if err != nil {
|
|
return fmt.Errorf("invalid CIDR range %s: %w", cidr, err)
|
|
}
|
|
|
|
parsedNets = append(parsedNets, ipNet)
|
|
}
|
|
|
|
n.allowedPrivateNets = parsedNets
|
|
log.Info().
|
|
Str("cidrs", cidrsString).
|
|
Int("count", len(parsedNets)).
|
|
Msg("Webhook private IP allowlist updated")
|
|
|
|
return nil
|
|
}
|
|
|
|
// isIPInAllowlist checks if an IP is in the configured allowlist
|
|
func (n *NotificationManager) isIPInAllowlist(ip net.IP) bool {
|
|
n.allowedPrivateMu.RLock()
|
|
defer n.allowedPrivateMu.RUnlock()
|
|
|
|
// No allowlist means block all private IPs
|
|
if len(n.allowedPrivateNets) == 0 {
|
|
return false
|
|
}
|
|
|
|
// Check if IP is in any allowed range
|
|
for _, ipNet := range n.allowedPrivateNets {
|
|
if ipNet.Contains(ip) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// addWebhookDelivery adds a webhook delivery record to the history
|
|
func (n *NotificationManager) addWebhookDelivery(delivery WebhookDelivery) {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
|
|
// Add to history
|
|
n.webhookHistory = append(n.webhookHistory, delivery)
|
|
|
|
// Keep only last 100 entries
|
|
if len(n.webhookHistory) > 100 {
|
|
// Remove oldest entry
|
|
n.webhookHistory = n.webhookHistory[1:]
|
|
}
|
|
}
|
|
|
|
// GetWebhookHistory returns recent webhook delivery history
|
|
func (n *NotificationManager) GetWebhookHistory() []WebhookDelivery {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
|
|
// Return a copy to avoid concurrent access issues
|
|
history := make([]WebhookDelivery, len(n.webhookHistory))
|
|
copy(history, n.webhookHistory)
|
|
return history
|
|
}
|
|
|
|
func buildNotificationTestAlert() *alerts.Alert {
|
|
return &alerts.Alert{
|
|
ID: "test-alert",
|
|
Type: "cpu",
|
|
Level: "warning",
|
|
ResourceID: "test-resource",
|
|
ResourceName: "Test Resource",
|
|
Node: "pve-node-01",
|
|
Instance: "https://192.168.1.100:8006",
|
|
Message: "This is a test alert from Pulse Monitoring to verify your notification settings are working correctly",
|
|
Value: 95.5,
|
|
Threshold: 90,
|
|
StartTime: time.Now().Add(-5 * time.Minute), // Show it's been active for 5 minutes
|
|
LastSeen: time.Now(),
|
|
Metadata: map[string]interface{}{
|
|
"resourceType": "vm",
|
|
},
|
|
}
|
|
}
|
|
|
|
// GetQueueStats returns statistics about the notification queue
|
|
func (n *NotificationManager) GetQueueStats() (map[string]int, error) {
|
|
n.mu.RLock()
|
|
queue := n.queue
|
|
n.mu.RUnlock()
|
|
|
|
if queue == nil {
|
|
return nil, fmt.Errorf("notification queue not initialized")
|
|
}
|
|
return queue.GetQueueStats()
|
|
}
|
|
|
|
// SendTestNotification sends a test notification
|
|
func (n *NotificationManager) SendTestNotification(method string) error {
|
|
testAlert := buildNotificationTestAlert()
|
|
|
|
switch method {
|
|
case "email":
|
|
log.Info().
|
|
Bool("enabled", n.emailConfig.Enabled).
|
|
Str("smtp", n.emailConfig.SMTPHost).
|
|
Int("port", n.emailConfig.SMTPPort).
|
|
Str("from", n.emailConfig.From).
|
|
Int("toCount", len(n.emailConfig.To)).
|
|
Msg("Testing email notification")
|
|
if !n.emailConfig.Enabled {
|
|
return fmt.Errorf("email notifications are not enabled")
|
|
}
|
|
n.sendEmail(testAlert)
|
|
return nil
|
|
case "webhook":
|
|
n.mu.RLock()
|
|
if len(n.webhooks) == 0 {
|
|
n.mu.RUnlock()
|
|
return fmt.Errorf("no webhooks configured")
|
|
}
|
|
// Find first enabled webhook and copy it before releasing lock
|
|
var webhookToTest *WebhookConfig
|
|
for _, webhook := range n.webhooks {
|
|
if webhook.Enabled {
|
|
// Copy webhook to avoid race condition
|
|
webhookCopy := webhook
|
|
webhookToTest = &webhookCopy
|
|
break
|
|
}
|
|
}
|
|
n.mu.RUnlock()
|
|
|
|
if webhookToTest == nil {
|
|
return fmt.Errorf("no enabled webhooks found")
|
|
}
|
|
n.sendWebhook(*webhookToTest, testAlert)
|
|
return nil
|
|
case "apprise":
|
|
n.mu.RLock()
|
|
appriseConfig := n.appriseConfig
|
|
n.mu.RUnlock()
|
|
|
|
log.Info().
|
|
Bool("enabled", appriseConfig.Enabled).
|
|
Str("mode", string(appriseConfig.Mode)).
|
|
Int("targetCount", len(appriseConfig.Targets)).
|
|
Msg("Testing Apprise notification")
|
|
|
|
if !appriseConfig.Enabled {
|
|
return fmt.Errorf("apprise notifications are not enabled")
|
|
}
|
|
|
|
// Use sendGroupedApprise with a single test alert
|
|
return n.sendGroupedApprise(appriseConfig, []*alerts.Alert{testAlert})
|
|
default:
|
|
return fmt.Errorf("unknown notification method: %s", method)
|
|
}
|
|
}
|
|
|
|
// SendTestAppriseWithConfig sends a test Apprise notification using provided config
|
|
func (n *NotificationManager) SendTestAppriseWithConfig(config AppriseConfig) error {
|
|
cfg := NormalizeAppriseConfig(config)
|
|
|
|
log.Info().
|
|
Bool("enabled", cfg.Enabled).
|
|
Str("mode", string(cfg.Mode)).
|
|
Int("targetCount", len(cfg.Targets)).
|
|
Str("serverURL", cfg.ServerURL).
|
|
Msg("Testing Apprise notification with provided config")
|
|
|
|
if !cfg.Enabled {
|
|
switch cfg.Mode {
|
|
case AppriseModeCLI:
|
|
return fmt.Errorf("apprise notifications are not enabled in the provided configuration: at least one target is required for CLI mode")
|
|
case AppriseModeHTTP:
|
|
return fmt.Errorf("apprise notifications are not enabled in the provided configuration: server URL is required for API mode")
|
|
default:
|
|
return fmt.Errorf("apprise notifications are not enabled in the provided configuration")
|
|
}
|
|
}
|
|
|
|
return n.sendGroupedApprise(cfg, []*alerts.Alert{buildNotificationTestAlert()})
|
|
}
|
|
|
|
// SendTestWebhook sends a test notification to a specific webhook
|
|
func (n *NotificationManager) SendTestWebhook(webhook WebhookConfig) error {
|
|
// Create a test alert for webhook testing with realistic values
|
|
// Use the configured publicURL if available, otherwise use a placeholder
|
|
instanceURL := n.publicURL
|
|
if instanceURL == "" {
|
|
instanceURL = "http://your-pulse-instance:7655"
|
|
}
|
|
|
|
testAlert := &alerts.Alert{
|
|
ID: "test-webhook-" + webhook.ID,
|
|
Type: "cpu",
|
|
Level: "warning",
|
|
ResourceID: "webhook-test",
|
|
ResourceName: "Test Alert",
|
|
Node: "test-node",
|
|
Instance: instanceURL, // Use the actual Pulse URL
|
|
Message: fmt.Sprintf("This is a test alert from Pulse to verify your %s webhook is working correctly", webhook.Name),
|
|
Value: 85.5,
|
|
Threshold: 80.0,
|
|
StartTime: time.Now().Add(-5 * time.Minute), // Alert started 5 minutes ago
|
|
LastSeen: time.Now(),
|
|
Metadata: map[string]interface{}{
|
|
"webhookName": webhook.Name,
|
|
"webhookURL": webhook.URL,
|
|
"testTime": time.Now().Format(time.RFC3339),
|
|
},
|
|
}
|
|
|
|
// Send the test webhook
|
|
n.sendWebhook(webhook, testAlert)
|
|
return nil
|
|
}
|
|
|
|
// SendTestNotificationWithConfig sends a test notification using provided config
|
|
func (n *NotificationManager) SendTestNotificationWithConfig(method string, config *EmailConfig, nodeInfo *TestNodeInfo) error {
|
|
// Use actual node info if provided, otherwise use defaults
|
|
nodeName := "test-node"
|
|
instanceURL := n.publicURL
|
|
if instanceURL == "" {
|
|
instanceURL = "https://proxmox.local:8006"
|
|
}
|
|
if nodeInfo != nil {
|
|
if nodeInfo.NodeName != "" {
|
|
nodeName = nodeInfo.NodeName
|
|
}
|
|
if nodeInfo.InstanceURL != "" {
|
|
instanceURL = nodeInfo.InstanceURL
|
|
}
|
|
}
|
|
|
|
testAlert := &alerts.Alert{
|
|
ID: "test-alert",
|
|
Type: "cpu",
|
|
Level: "warning",
|
|
ResourceID: "test-email-config",
|
|
ResourceName: "Email Configuration Test",
|
|
Node: nodeName,
|
|
Instance: instanceURL,
|
|
Message: "This is a test alert to verify your email notification settings are working correctly",
|
|
Value: 85.5,
|
|
Threshold: 80,
|
|
StartTime: time.Now(),
|
|
LastSeen: time.Now(),
|
|
Metadata: map[string]interface{}{
|
|
"resourceType": "test",
|
|
},
|
|
}
|
|
|
|
switch method {
|
|
case "email":
|
|
if config == nil {
|
|
return fmt.Errorf("email configuration is required")
|
|
}
|
|
|
|
log.Info().
|
|
Bool("enabled", config.Enabled).
|
|
Str("smtp", config.SMTPHost).
|
|
Int("port", config.SMTPPort).
|
|
Str("from", config.From).
|
|
Int("toCount", len(config.To)).
|
|
Strs("to", config.To).
|
|
Bool("smtpEmpty", config.SMTPHost == "").
|
|
Bool("fromEmpty", config.From == "").
|
|
Msg("Testing email notification with provided config")
|
|
|
|
if !config.Enabled {
|
|
return fmt.Errorf("email notifications are not enabled in the provided configuration")
|
|
}
|
|
|
|
if config.SMTPHost == "" || config.From == "" {
|
|
return fmt.Errorf("email configuration is incomplete: SMTP host and from address are required")
|
|
}
|
|
|
|
// Generate email using template
|
|
subject, htmlBody, textBody := EmailTemplate([]*alerts.Alert{testAlert}, true)
|
|
|
|
// Send using provided config and return any error
|
|
return n.sendHTMLEmailWithError(subject, htmlBody, textBody, *config)
|
|
|
|
default:
|
|
return fmt.Errorf("unsupported method for config-based testing: %s", method)
|
|
}
|
|
}
|
|
|
|
func normalizeQueueType(notifType string) (string, notificationEvent) {
|
|
if strings.HasSuffix(notifType, queueTypeSuffixResolved) {
|
|
return strings.TrimSuffix(notifType, queueTypeSuffixResolved), eventResolved
|
|
}
|
|
if strings.HasSuffix(notifType, queueTypeSuffixEscalation) {
|
|
return strings.TrimSuffix(notifType, queueTypeSuffixEscalation), eventEscalation
|
|
}
|
|
return notifType, eventAlert
|
|
}
|
|
|
|
func resolvedTimeFromAlerts(alerts []*alerts.Alert) time.Time {
|
|
for _, alert := range alerts {
|
|
if alert == nil || alert.Metadata == nil {
|
|
continue
|
|
}
|
|
raw, ok := alert.Metadata[metadataResolvedAt]
|
|
if !ok {
|
|
continue
|
|
}
|
|
switch ts := raw.(type) {
|
|
case string:
|
|
if parsed, err := time.Parse(time.RFC3339, ts); err == nil {
|
|
return parsed
|
|
}
|
|
case float64:
|
|
if ts > 0 {
|
|
return time.Unix(int64(ts), 0)
|
|
}
|
|
}
|
|
}
|
|
|
|
return time.Now()
|
|
}
|
|
|
|
// ProcessQueuedNotification processes a notification from the persistent queue
|
|
func (n *NotificationManager) ProcessQueuedNotification(notif *QueuedNotification) error {
|
|
baseType, event := normalizeQueueType(notif.Type)
|
|
|
|
log.Debug().
|
|
Str("notificationID", notif.ID).
|
|
Str("type", baseType).
|
|
Str("event", string(event)).
|
|
Int("alertCount", len(notif.Alerts)).
|
|
Msg("Processing queued notification")
|
|
|
|
var err error
|
|
switch baseType {
|
|
case "email":
|
|
emailConfig, resolveErr := n.resolveQueuedEmailConfig()
|
|
if resolveErr != nil {
|
|
return resolveErr
|
|
}
|
|
if event == eventResolved {
|
|
err = n.sendResolvedEmail(emailConfig, notif.Alerts, resolvedTimeFromAlerts(notif.Alerts))
|
|
} else {
|
|
err = n.sendGroupedEmail(emailConfig, notif.Alerts)
|
|
}
|
|
|
|
case "webhook":
|
|
var queuedWebhookConfig WebhookConfig
|
|
if err = json.Unmarshal(notif.Config, &queuedWebhookConfig); err != nil {
|
|
return fmt.Errorf("failed to unmarshal webhook config: %w", err)
|
|
}
|
|
webhookConfig, resolveErr := n.resolveQueuedWebhookConfig(queuedWebhookConfig)
|
|
if resolveErr != nil {
|
|
return resolveErr
|
|
}
|
|
if event == eventResolved {
|
|
err = n.sendResolvedWebhook(webhookConfig, notif.Alerts, resolvedTimeFromAlerts(notif.Alerts))
|
|
} else {
|
|
err = n.sendGroupedWebhook(webhookConfig, notif.Alerts)
|
|
}
|
|
|
|
case "apprise":
|
|
appriseConfig, resolveErr := n.resolveQueuedAppriseConfig()
|
|
if resolveErr != nil {
|
|
return resolveErr
|
|
}
|
|
if event == eventResolved {
|
|
err = n.sendResolvedApprise(appriseConfig, notif.Alerts, resolvedTimeFromAlerts(notif.Alerts))
|
|
} else {
|
|
err = n.sendGroupedApprise(appriseConfig, notif.Alerts)
|
|
}
|
|
|
|
default:
|
|
return fmt.Errorf("unknown notification type: %s", baseType)
|
|
}
|
|
|
|
// Mark cooldown after successful send for active alerts only
|
|
if err == nil && event == eventAlert {
|
|
n.mu.Lock()
|
|
now := time.Now()
|
|
for _, alert := range notif.Alerts {
|
|
n.lastNotified[alert.ID] = notificationRecord{
|
|
lastSent: now,
|
|
alertStart: alert.StartTime,
|
|
}
|
|
}
|
|
n.mu.Unlock()
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func (n *NotificationManager) resolveQueuedEmailConfig() (EmailConfig, error) {
|
|
n.mu.RLock()
|
|
enabled := n.enabled
|
|
config := copyEmailConfig(n.emailConfig)
|
|
n.mu.RUnlock()
|
|
|
|
if !enabled {
|
|
return EmailConfig{}, fmt.Errorf("%w: notifications are disabled", ErrNotificationCancelled)
|
|
}
|
|
if !config.Enabled {
|
|
return EmailConfig{}, fmt.Errorf("%w: email notifications are disabled", ErrNotificationCancelled)
|
|
}
|
|
|
|
return config, nil
|
|
}
|
|
|
|
func (n *NotificationManager) resolveQueuedAppriseConfig() (AppriseConfig, error) {
|
|
n.mu.RLock()
|
|
enabled := n.enabled
|
|
config := copyAppriseConfig(n.appriseConfig)
|
|
n.mu.RUnlock()
|
|
|
|
if !enabled {
|
|
return AppriseConfig{}, fmt.Errorf("%w: notifications are disabled", ErrNotificationCancelled)
|
|
}
|
|
if !config.Enabled {
|
|
return AppriseConfig{}, fmt.Errorf("%w: Apprise notifications are disabled", ErrNotificationCancelled)
|
|
}
|
|
|
|
return config, nil
|
|
}
|
|
|
|
func (n *NotificationManager) resolveQueuedWebhookConfig(queued WebhookConfig) (WebhookConfig, error) {
|
|
n.mu.RLock()
|
|
enabled := n.enabled
|
|
webhooks := copyWebhookConfigs(n.webhooks)
|
|
n.mu.RUnlock()
|
|
|
|
if !enabled {
|
|
return WebhookConfig{}, fmt.Errorf("%w: notifications are disabled", ErrNotificationCancelled)
|
|
}
|
|
if queued.ID != "" {
|
|
for _, webhook := range webhooks {
|
|
if webhook.ID != queued.ID {
|
|
continue
|
|
}
|
|
if !webhook.Enabled {
|
|
return WebhookConfig{}, fmt.Errorf("%w: webhook %s is disabled", ErrNotificationCancelled, queued.ID)
|
|
}
|
|
return webhook, nil
|
|
}
|
|
return WebhookConfig{}, fmt.Errorf("%w: webhook %s no longer exists", ErrNotificationCancelled, queued.ID)
|
|
}
|
|
|
|
for _, webhook := range webhooks {
|
|
if webhook.Name == queued.Name && webhook.URL == queued.URL {
|
|
if !webhook.Enabled {
|
|
return WebhookConfig{}, fmt.Errorf("%w: webhook %q is disabled", ErrNotificationCancelled, queued.Name)
|
|
}
|
|
return webhook, nil
|
|
}
|
|
}
|
|
|
|
return WebhookConfig{}, fmt.Errorf("%w: webhook %q no longer exists", ErrNotificationCancelled, queued.Name)
|
|
}
|
|
|
|
// cleanupOldNotificationRecords periodically cleans up old entries from lastNotified map
|
|
func (n *NotificationManager) cleanupOldNotificationRecords() {
|
|
ticker := time.NewTicker(1 * time.Hour)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
n.mu.Lock()
|
|
now := time.Now()
|
|
cutoff := now.Add(-24 * time.Hour)
|
|
cleaned := 0
|
|
|
|
for alertID, record := range n.lastNotified {
|
|
// Remove entries older than 24 hours
|
|
if record.lastSent.Before(cutoff) {
|
|
delete(n.lastNotified, alertID)
|
|
cleaned++
|
|
}
|
|
}
|
|
|
|
if cleaned > 0 {
|
|
log.Debug().
|
|
Int("cleaned", cleaned).
|
|
Int("remaining", len(n.lastNotified)).
|
|
Msg("Cleaned up old notification cooldown records")
|
|
}
|
|
n.mu.Unlock()
|
|
case <-n.stopCleanup:
|
|
// Stop cleanup when manager is stopped
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// Stop gracefully stops the notification manager
|
|
func (n *NotificationManager) Stop() {
|
|
n.mu.Lock()
|
|
|
|
// Stop cleanup goroutine
|
|
close(n.stopCleanup)
|
|
|
|
// Get queue reference before unlocking
|
|
queue := n.queue
|
|
|
|
// Unlock before stopping queue to avoid deadlock with queue workers
|
|
// that may need to acquire n.mu during ProcessQueuedNotification
|
|
n.mu.Unlock()
|
|
|
|
// Stop the notification queue if it exists
|
|
if queue != nil {
|
|
queue.Stop()
|
|
}
|
|
|
|
// Relock for remaining cleanup
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
|
|
// Cancel any pending group timer
|
|
if n.groupTimer != nil {
|
|
n.groupTimer.Stop()
|
|
n.groupTimer = nil
|
|
}
|
|
|
|
// Clear pending alerts
|
|
n.pendingAlerts = nil
|
|
|
|
log.Info().Msg("NotificationManager stopped")
|
|
}
|