mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-07 08:57:12 +00:00
3300 lines
97 KiB
Go
3300 lines
97 KiB
Go
package notifications
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"crypto/tls"
|
|
"encoding/json"
|
|
"fmt"
|
|
"html"
|
|
"io"
|
|
"math"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"os/exec"
|
|
"strings"
|
|
"sync"
|
|
"text/template"
|
|
"time"
|
|
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/alerts"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/securityutil"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
// Webhook configuration constants
|
|
const (
|
|
// HTTP client settings
|
|
WebhookTimeout = 30 * time.Second
|
|
WebhookMaxResponseSize = 1 * 1024 * 1024 // 1 MB max response size
|
|
WebhookMaxRedirects = 3 // Maximum number of redirects to follow
|
|
WebhookTestTimeout = 10 * time.Second
|
|
|
|
// Retry settings
|
|
WebhookInitialBackoff = 1 * time.Second
|
|
WebhookMaxBackoff = 30 * time.Second
|
|
WebhookDefaultRetries = 3
|
|
|
|
// History settings
|
|
WebhookHistoryMaxSize = 100
|
|
|
|
// Rate limiting settings
|
|
WebhookRateLimitWindow = 1 * time.Minute // Time window for rate limiting
|
|
WebhookRateLimitMax = 10 // Max requests per window per webhook
|
|
)
|
|
|
|
const (
|
|
queueTypeSuffixResolved = "_resolved"
|
|
metadataResolvedAt = "resolvedAt"
|
|
|
|
// Apprise timeout bounds (seconds), applied during config normalization.
|
|
appriseDefaultTimeoutSecs = 15
|
|
appriseMaxTimeoutSecs = 120
|
|
appriseMinTimeoutSecs = 5
|
|
)
|
|
|
|
// notificationEvent represents the type of notification being processed.
|
|
// It distinguishes between new alerts and resolved/cleared alerts.
|
|
type notificationEvent string
|
|
|
|
const (
|
|
eventAlert notificationEvent = "alert"
|
|
eventResolved notificationEvent = "resolved"
|
|
)
|
|
|
|
type notificationDeliveryJob struct {
|
|
Type string
|
|
Event notificationEvent
|
|
Alerts []*alerts.Alert
|
|
ResolvedAt time.Time
|
|
EmailConfig *EmailConfig
|
|
WebhookConfig *WebhookConfig
|
|
AppriseConfig *AppriseConfig
|
|
}
|
|
|
|
// createSecureWebhookClient creates an HTTP client with security controls
|
|
func (n *NotificationManager) createSecureWebhookClient(timeout time.Duration) *http.Client {
|
|
return n.createSecureWebhookClientWithTLS(timeout, false)
|
|
}
|
|
|
|
// createSecureWebhookClientWithTLS creates a secure HTTP client with optional TLS verification override.
|
|
func (n *NotificationManager) createSecureWebhookClientWithTLS(timeout time.Duration, skipTLSVerify bool) *http.Client {
|
|
// dedicated transport that pins DNS resolution to prevent rebinding
|
|
transport := &http.Transport{
|
|
// Proxy intentionally nil — outbound proxies would bypass DialContext
|
|
// SSRF checks by resolving the target on the proxy side.
|
|
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
|
|
// Extract hostname and port
|
|
host, port, err := net.SplitHostPort(addr)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parse webhook address %q: %w", addr, err)
|
|
}
|
|
|
|
// Validate IP if it's already an IP
|
|
if ip := net.ParseIP(host); ip != nil {
|
|
if isPrivateIP(ip) && !n.isIPInAllowlist(ip) {
|
|
return nil, fmt.Errorf("blocked private IP: %s", ip)
|
|
}
|
|
// It's an IP, dial directly
|
|
d := net.Dialer{Timeout: 10 * time.Second}
|
|
return d.DialContext(ctx, network, addr)
|
|
}
|
|
|
|
// Resolve hostname
|
|
ips, err := net.LookupIP(host)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("resolve webhook host %q: %w", host, err)
|
|
}
|
|
|
|
// Find first permitted IP
|
|
var permittedIP net.IP
|
|
for _, ip := range ips {
|
|
if !isPrivateIP(ip) || n.isIPInAllowlist(ip) {
|
|
permittedIP = ip
|
|
break
|
|
}
|
|
}
|
|
|
|
if permittedIP == nil {
|
|
return nil, fmt.Errorf("hostname %s resolves to blocked private IPs", host)
|
|
}
|
|
|
|
// Log if we filtered some IPs
|
|
if len(ips) > 1 {
|
|
log.Debug().
|
|
Str("host", host).
|
|
Str("selected_ip", permittedIP.String()).
|
|
Msg("dns resolution pinned for webhook security")
|
|
}
|
|
|
|
// Dial the permitted IP
|
|
d := net.Dialer{Timeout: 10 * time.Second}
|
|
return d.DialContext(ctx, network, net.JoinHostPort(permittedIP.String(), port))
|
|
},
|
|
ForceAttemptHTTP2: true,
|
|
MaxIdleConns: 100,
|
|
IdleConnTimeout: 90 * time.Second,
|
|
TLSHandshakeTimeout: 10 * time.Second,
|
|
ExpectContinueTimeout: 1 * time.Second,
|
|
}
|
|
if skipTLSVerify {
|
|
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
|
|
}
|
|
|
|
return &http.Client{
|
|
Timeout: timeout,
|
|
Transport: transport,
|
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
|
if len(via) >= WebhookMaxRedirects {
|
|
return fmt.Errorf("stopped after %d redirects", WebhookMaxRedirects)
|
|
}
|
|
// Re-validate strictly on redirect
|
|
return n.ValidateWebhookURL(req.URL.String())
|
|
},
|
|
}
|
|
}
|
|
|
|
// TestNodeInfo contains information about nodes for test notifications
|
|
type TestNodeInfo struct {
|
|
NodeName string
|
|
InstanceURL string
|
|
}
|
|
|
|
// WebhookDelivery tracks webhook delivery attempts for debugging
|
|
type WebhookDelivery struct {
|
|
WebhookName string `json:"webhookName"`
|
|
WebhookURL string `json:"webhookUrl"`
|
|
Service string `json:"service"`
|
|
AlertIdentifier string `json:"alertIdentifier,omitempty"`
|
|
Timestamp time.Time `json:"timestamp"`
|
|
StatusCode int `json:"statusCode"`
|
|
Success bool `json:"success"`
|
|
ErrorMessage string `json:"errorMessage,omitempty"`
|
|
RetryAttempts int `json:"retryAttempts"`
|
|
PayloadSize int `json:"payloadSize"`
|
|
}
|
|
|
|
// webhookRateLimit tracks rate limiting for webhook deliveries
|
|
type webhookRateLimit struct {
|
|
lastSent time.Time
|
|
sentCount int
|
|
}
|
|
|
|
// NotificationManager handles sending notifications
|
|
type NotificationManager struct {
|
|
mu sync.RWMutex
|
|
stopOnce sync.Once
|
|
cleanupWG sync.WaitGroup
|
|
emailConfig EmailConfig
|
|
emailManager *EnhancedEmailManager // Shared email manager for rate limiting
|
|
webhooks []WebhookConfig
|
|
appriseConfig AppriseConfig
|
|
enabled bool
|
|
cooldown time.Duration
|
|
notifyOnResolve bool
|
|
lastNotified map[string]notificationRecord
|
|
groupWindow time.Duration
|
|
pendingAlerts []*alerts.Alert
|
|
groupTimer *time.Timer
|
|
groupByNode bool
|
|
publicURL string // Full URL to access Pulse
|
|
groupByGuest bool
|
|
webhookHistory []WebhookDelivery // Keep last 100 webhook deliveries for debugging
|
|
webhookRateLimits map[string]*webhookRateLimit // Track rate limits per webhook URL
|
|
webhookRateMu sync.Mutex // Separate mutex for webhook rate limiting
|
|
webhookRateCleanup time.Time // Last cleanup time for webhook rate limit entries
|
|
appriseExec appriseExecFunc
|
|
queue *NotificationQueue // Persistent notification queue
|
|
webhookClient *http.Client // Shared HTTP client for webhooks
|
|
stopCleanup chan struct{} // Signal to stop cleanup goroutine
|
|
cleanupDone chan struct{} // Signals cleanup goroutine exit during shutdown
|
|
allowedPrivateNets []*net.IPNet // Parsed CIDR ranges allowed for private webhook targets
|
|
allowedPrivateMu sync.RWMutex // Protects allowedPrivateNets
|
|
}
|
|
|
|
type webhookHTTPResult struct {
|
|
statusCode int
|
|
headers http.Header
|
|
body string
|
|
}
|
|
|
|
type webhookRequestOptions struct {
|
|
alertType string
|
|
timeout time.Duration
|
|
userAgent string
|
|
responseLogging bool
|
|
validateURL bool
|
|
}
|
|
|
|
// spawnAsync is used for fire-and-forget notification delivery.
|
|
// It exists so tests can disable or inline async behavior without relying on sleeps.
|
|
var spawnAsync = func(f func()) { go f() }
|
|
|
|
type appriseExecFunc func(ctx context.Context, path string, args []string) ([]byte, error)
|
|
|
|
// copyEmailConfig returns a defensive copy of EmailConfig including its slices to avoid data races.
|
|
func copyEmailConfig(cfg EmailConfig) EmailConfig {
|
|
copy := cfg
|
|
if len(cfg.To) > 0 {
|
|
copy.To = append([]string(nil), cfg.To...)
|
|
}
|
|
return copy
|
|
}
|
|
|
|
// copyWebhookConfigs deep-copies webhook configurations to isolate concurrent writers from background senders.
|
|
func copyWebhookConfigs(webhooks []WebhookConfig) []WebhookConfig {
|
|
if len(webhooks) == 0 {
|
|
return nil
|
|
}
|
|
|
|
copies := make([]WebhookConfig, 0, len(webhooks))
|
|
for _, webhook := range webhooks {
|
|
clone := webhook
|
|
if len(clone.Headers) > 0 {
|
|
headers := make(map[string]string, len(webhook.Headers))
|
|
for k, v := range clone.Headers {
|
|
headers[k] = v
|
|
}
|
|
clone.Headers = headers
|
|
}
|
|
if len(clone.CustomFields) > 0 {
|
|
custom := make(map[string]string, len(clone.CustomFields))
|
|
for k, v := range clone.CustomFields {
|
|
custom[k] = v
|
|
}
|
|
clone.CustomFields = custom
|
|
}
|
|
copies = append(copies, clone)
|
|
}
|
|
|
|
return copies
|
|
}
|
|
|
|
func copyWebhookConfig(webhook WebhookConfig) WebhookConfig {
|
|
clones := copyWebhookConfigs([]WebhookConfig{webhook})
|
|
if len(clones) == 0 {
|
|
return WebhookConfig{}
|
|
}
|
|
return clones[0]
|
|
}
|
|
|
|
// NormalizeWebhookConfig canonicalizes service-specific webhook configuration fields
|
|
// at ingress and persistence boundaries. Runtime delivery paths should already
|
|
// carry canonical webhook state and must not rely on legacy alias migration here.
|
|
func NormalizeWebhookConfig(webhook WebhookConfig) WebhookConfig {
|
|
normalized := webhook
|
|
if strings.EqualFold(strings.TrimSpace(normalized.Service), "pushover") {
|
|
normalized.CustomFields = normalizePushoverWebhookCustomFields(normalized.CustomFields)
|
|
}
|
|
return normalized
|
|
}
|
|
|
|
func normalizePushoverWebhookCustomFields(fields map[string]string) map[string]string {
|
|
if len(fields) == 0 {
|
|
return nil
|
|
}
|
|
|
|
normalized := make(map[string]string, len(fields))
|
|
for key, value := range fields {
|
|
normalized[key] = value
|
|
}
|
|
|
|
token := strings.TrimSpace(normalized["token"])
|
|
if token == "" {
|
|
if legacy := strings.TrimSpace(normalized["app_token"]); legacy != "" {
|
|
normalized["token"] = legacy
|
|
}
|
|
}
|
|
|
|
user := strings.TrimSpace(normalized["user"])
|
|
if user == "" {
|
|
if legacy := strings.TrimSpace(normalized["user_token"]); legacy != "" {
|
|
normalized["user"] = legacy
|
|
}
|
|
}
|
|
|
|
delete(normalized, "app_token")
|
|
delete(normalized, "user_token")
|
|
|
|
if len(normalized) == 0 {
|
|
return nil
|
|
}
|
|
return normalized
|
|
}
|
|
|
|
func copyAppriseConfig(cfg AppriseConfig) AppriseConfig {
|
|
copy := cfg
|
|
if len(cfg.Targets) > 0 {
|
|
copy.Targets = append([]string(nil), cfg.Targets...)
|
|
}
|
|
return copy
|
|
}
|
|
|
|
// annotateResolvedMetadata stores the resolution timestamp on the alert metadata for queue persistence.
|
|
func annotateResolvedMetadata(alert *alerts.Alert, resolvedAt time.Time) {
|
|
if alert == nil {
|
|
return
|
|
}
|
|
if alert.Metadata == nil {
|
|
alert.Metadata = make(map[string]interface{})
|
|
}
|
|
alert.Metadata[metadataResolvedAt] = resolvedAt.Format(time.RFC3339)
|
|
}
|
|
|
|
// NormalizeAppriseConfig cleans and normalizes Apprise configuration values.
|
|
func NormalizeAppriseConfig(cfg AppriseConfig) AppriseConfig {
|
|
normalized := cfg
|
|
|
|
mode := strings.ToLower(strings.TrimSpace(string(normalized.Mode)))
|
|
switch mode {
|
|
case string(AppriseModeHTTP):
|
|
normalized.Mode = AppriseModeHTTP
|
|
default:
|
|
normalized.Mode = AppriseModeCLI
|
|
}
|
|
|
|
normalized.CLIPath = "apprise" // Force default binary for security
|
|
|
|
if normalized.TimeoutSeconds <= 0 {
|
|
normalized.TimeoutSeconds = appriseDefaultTimeoutSecs
|
|
} else if normalized.TimeoutSeconds > appriseMaxTimeoutSecs {
|
|
normalized.TimeoutSeconds = appriseMaxTimeoutSecs
|
|
} else if normalized.TimeoutSeconds < appriseMinTimeoutSecs {
|
|
normalized.TimeoutSeconds = appriseMinTimeoutSecs
|
|
}
|
|
|
|
cleanTargets := make([]string, 0, len(normalized.Targets))
|
|
seen := make(map[string]struct{}, len(normalized.Targets))
|
|
for _, target := range normalized.Targets {
|
|
trimmed := strings.TrimSpace(target)
|
|
if trimmed == "" {
|
|
continue
|
|
}
|
|
lower := strings.ToLower(trimmed)
|
|
if _, exists := seen[lower]; exists {
|
|
continue
|
|
}
|
|
seen[lower] = struct{}{}
|
|
cleanTargets = append(cleanTargets, trimmed)
|
|
}
|
|
normalized.Targets = cleanTargets
|
|
|
|
normalized.ServerURL = strings.TrimSpace(normalized.ServerURL)
|
|
normalized.ServerURL = strings.TrimRight(normalized.ServerURL, "/")
|
|
|
|
normalized.ConfigKey = strings.TrimSpace(normalized.ConfigKey)
|
|
|
|
normalized.APIKey = strings.TrimSpace(normalized.APIKey)
|
|
normalized.APIKeyHeader = strings.TrimSpace(normalized.APIKeyHeader)
|
|
if normalized.APIKeyHeader == "" {
|
|
normalized.APIKeyHeader = "X-API-KEY"
|
|
}
|
|
|
|
switch normalized.Mode {
|
|
case AppriseModeCLI:
|
|
if len(normalized.Targets) == 0 {
|
|
normalized.Enabled = false
|
|
}
|
|
case AppriseModeHTTP:
|
|
if normalized.ServerURL == "" {
|
|
normalized.Enabled = false
|
|
}
|
|
}
|
|
|
|
return normalized
|
|
}
|
|
|
|
func defaultAppriseExec(ctx context.Context, path string, args []string) ([]byte, error) {
|
|
cmd := exec.CommandContext(ctx, path, args...)
|
|
return cmd.CombinedOutput()
|
|
}
|
|
|
|
type notificationRecord struct {
|
|
lastSent time.Time
|
|
alertStart time.Time
|
|
}
|
|
|
|
// Alert represents an alert (interface to avoid circular dependency)
|
|
type Alert interface {
|
|
GetID() string
|
|
GetResourceName() string
|
|
GetType() string
|
|
GetLevel() string
|
|
GetValue() float64
|
|
GetThreshold() float64
|
|
GetMessage() string
|
|
GetNode() string
|
|
GetInstance() string
|
|
GetStartTime() time.Time
|
|
}
|
|
|
|
// EmailConfig holds email notification settings
|
|
type EmailConfig struct {
|
|
Enabled bool `json:"enabled"`
|
|
Provider string `json:"provider"` // Email provider name (Gmail, SendGrid, etc.)
|
|
SMTPHost string `json:"server"` // Changed from smtpHost to server for frontend consistency
|
|
SMTPPort int `json:"port"` // Changed from smtpPort to port for frontend consistency
|
|
Username string `json:"username"`
|
|
Password string `json:"password"`
|
|
From string `json:"from"`
|
|
To []string `json:"to"`
|
|
TLS bool `json:"tls"`
|
|
StartTLS bool `json:"startTLS"` // STARTTLS support
|
|
RateLimit int `json:"rateLimit"` // Max emails per minute (0 = default 60)
|
|
}
|
|
|
|
// WebhookConfig holds webhook settings
|
|
type WebhookConfig struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
URL string `json:"url"`
|
|
Method string `json:"method"`
|
|
Headers map[string]string `json:"headers"`
|
|
Enabled bool `json:"enabled"`
|
|
Service string `json:"service"` // discord, slack, teams, etc.
|
|
Template string `json:"template"` // Custom payload template
|
|
CustomFields map[string]string `json:"customFields,omitempty"`
|
|
Mention string `json:"mention,omitempty"` // Platform-specific mention (e.g., @everyone, @channel, <@USER_ID>)
|
|
}
|
|
|
|
// AppriseMode identifies how Pulse should deliver notifications through Apprise.
|
|
type AppriseMode string
|
|
|
|
const (
|
|
AppriseModeCLI AppriseMode = "cli"
|
|
AppriseModeHTTP AppriseMode = "http"
|
|
)
|
|
|
|
const (
|
|
defaultSMTPPort = 587
|
|
defaultEmailRateLimit = 60
|
|
)
|
|
|
|
// AppriseConfig holds Apprise notification settings.
|
|
type AppriseConfig struct {
|
|
Enabled bool `json:"enabled"`
|
|
Mode AppriseMode `json:"mode,omitempty"`
|
|
Targets []string `json:"targets"`
|
|
CLIPath string `json:"cliPath,omitempty"`
|
|
TimeoutSeconds int `json:"timeoutSeconds,omitempty"`
|
|
ServerURL string `json:"serverUrl,omitempty"`
|
|
ConfigKey string `json:"configKey,omitempty"`
|
|
APIKey string `json:"apiKey,omitempty"`
|
|
APIKeyHeader string `json:"apiKeyHeader,omitempty"`
|
|
SkipTLSVerify bool `json:"skipTlsVerify,omitempty"`
|
|
}
|
|
|
|
func normalizeEmailConfig(cfg EmailConfig) EmailConfig {
|
|
normalized := cfg
|
|
normalized.Provider = strings.TrimSpace(normalized.Provider)
|
|
normalized.SMTPHost = strings.TrimSpace(normalized.SMTPHost)
|
|
normalized.Username = strings.TrimSpace(normalized.Username)
|
|
normalized.From = strings.TrimSpace(normalized.From)
|
|
|
|
if normalized.SMTPPort <= 0 || normalized.SMTPPort > 65535 {
|
|
log.Warn().
|
|
Int("smtpPort", normalized.SMTPPort).
|
|
Int("defaultPort", defaultSMTPPort).
|
|
Msg("Invalid SMTP port in email config, using default")
|
|
normalized.SMTPPort = defaultSMTPPort
|
|
}
|
|
|
|
if normalized.RateLimit < 0 {
|
|
log.Warn().
|
|
Int("rateLimit", normalized.RateLimit).
|
|
Msg("Invalid negative email rate limit, using default behavior")
|
|
normalized.RateLimit = 0
|
|
}
|
|
|
|
if len(normalized.To) > 0 {
|
|
cleaned := make([]string, 0, len(normalized.To))
|
|
seen := make(map[string]struct{}, len(normalized.To))
|
|
for _, recipient := range normalized.To {
|
|
trimmed := strings.TrimSpace(recipient)
|
|
if trimmed == "" {
|
|
continue
|
|
}
|
|
key := strings.ToLower(trimmed)
|
|
if _, exists := seen[key]; exists {
|
|
continue
|
|
}
|
|
seen[key] = struct{}{}
|
|
cleaned = append(cleaned, trimmed)
|
|
}
|
|
normalized.To = cleaned
|
|
}
|
|
|
|
return normalized
|
|
}
|
|
|
|
func effectiveEmailRateLimit(configured int) int {
|
|
if configured <= 0 {
|
|
return defaultEmailRateLimit
|
|
}
|
|
return configured
|
|
}
|
|
|
|
// NewNotificationManager creates a new notification manager using the global data directory.
|
|
// For multi-tenant deployments, use NewNotificationManagerWithDataDir instead.
|
|
func NewNotificationManager(publicURL string) *NotificationManager {
|
|
return NewNotificationManagerWithDataDir(publicURL, "")
|
|
}
|
|
|
|
// NewNotificationManagerWithDataDir creates a new notification manager with a custom data directory.
|
|
// This enables tenant-scoped notification queue persistence in multi-tenant deployments.
|
|
// If dataDir is empty, it uses the global data directory.
|
|
func NewNotificationManagerWithDataDir(publicURL string, dataDir string) *NotificationManager {
|
|
cleanURL := strings.TrimRight(strings.TrimSpace(publicURL), "/")
|
|
if cleanURL != "" {
|
|
log.Info().Str("publicURL", cleanURL).Msg("notification manager initialized with public URL")
|
|
} else {
|
|
log.Info().Msg("notification manager initialized without public URL - webhook links may not work")
|
|
}
|
|
|
|
// Initialize persistent queue with tenant-specific data directory
|
|
queue, err := NewNotificationQueue(dataDir)
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("failed to initialize persistent notification queue, falling back to in-memory queue")
|
|
queue, err = NewInMemoryNotificationQueue()
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("failed to initialize in-memory notification queue, notifications will be unavailable")
|
|
queue = nil
|
|
}
|
|
}
|
|
|
|
nm := &NotificationManager{
|
|
enabled: true,
|
|
cooldown: 5 * time.Minute,
|
|
notifyOnResolve: true,
|
|
lastNotified: make(map[string]notificationRecord),
|
|
webhooks: []WebhookConfig{},
|
|
appriseConfig: AppriseConfig{
|
|
Enabled: false,
|
|
Mode: AppriseModeCLI,
|
|
Targets: []string{},
|
|
CLIPath: "apprise",
|
|
TimeoutSeconds: 15,
|
|
APIKeyHeader: "X-API-KEY",
|
|
},
|
|
groupWindow: 30 * time.Second,
|
|
pendingAlerts: make([]*alerts.Alert, 0),
|
|
groupByNode: true,
|
|
groupByGuest: false,
|
|
webhookHistory: make([]WebhookDelivery, 0, WebhookHistoryMaxSize),
|
|
webhookRateLimits: make(map[string]*webhookRateLimit),
|
|
publicURL: cleanURL,
|
|
appriseExec: defaultAppriseExec,
|
|
queue: queue,
|
|
stopCleanup: make(chan struct{}),
|
|
cleanupDone: make(chan struct{}),
|
|
}
|
|
|
|
// Create webhook client after NotificationManager is initialized
|
|
nm.webhookClient = nm.createSecureWebhookClient(WebhookTimeout)
|
|
|
|
// Wire up queue processor if queue is available
|
|
if queue != nil {
|
|
queue.SetProcessor(nm.ProcessQueuedNotification)
|
|
}
|
|
|
|
// Start periodic cleanup of old lastNotified entries (every 1 hour)
|
|
nm.cleanupWG.Add(1)
|
|
go nm.cleanupOldNotificationRecords()
|
|
|
|
return nm
|
|
}
|
|
|
|
// SetPublicURL updates the public URL used for webhook payloads.
|
|
func (n *NotificationManager) SetPublicURL(publicURL string) {
|
|
trimmed := strings.TrimRight(strings.TrimSpace(publicURL), "/")
|
|
if trimmed == "" {
|
|
return
|
|
}
|
|
|
|
n.mu.Lock()
|
|
if n.publicURL == trimmed {
|
|
n.mu.Unlock()
|
|
return
|
|
}
|
|
n.publicURL = trimmed
|
|
n.mu.Unlock()
|
|
|
|
log.Info().Str("publicURL", trimmed).Msg("notification manager public URL updated")
|
|
}
|
|
|
|
// GetPublicURL returns the configured public URL for notifications.
|
|
func (n *NotificationManager) GetPublicURL() string {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
return n.publicURL
|
|
}
|
|
|
|
// SetEmailConfig updates email configuration
|
|
func (n *NotificationManager) SetEmailConfig(config EmailConfig) {
|
|
n.mu.Lock()
|
|
config = normalizeEmailConfig(copyEmailConfig(config))
|
|
n.emailConfig = config
|
|
|
|
// Recreate email manager with new config to preserve rate limiting state
|
|
rateLimit := effectiveEmailRateLimit(config.RateLimit)
|
|
providerConfig := EmailProviderConfig{
|
|
EmailConfig: config,
|
|
Provider: "",
|
|
MaxRetries: 3,
|
|
RetryDelay: 5,
|
|
RateLimit: rateLimit,
|
|
StartTLS: config.StartTLS,
|
|
SkipTLSVerify: false,
|
|
AuthRequired: config.Username != "" && config.Password != "",
|
|
}
|
|
n.emailManager = NewEnhancedEmailManager(providerConfig)
|
|
queue := n.queue
|
|
disabled := !config.Enabled
|
|
n.mu.Unlock()
|
|
|
|
if disabled && queue != nil {
|
|
if err := queue.CancelByTypes([]string{"email", "email" + queueTypeSuffixResolved}, "Email notifications disabled"); err != nil {
|
|
log.Error().Err(err).Msg("failed to cancel queued email notifications after disabling email delivery")
|
|
}
|
|
}
|
|
}
|
|
|
|
// SetAppriseConfig updates Apprise configuration.
|
|
func (n *NotificationManager) SetAppriseConfig(config AppriseConfig) {
|
|
n.mu.Lock()
|
|
n.appriseConfig = NormalizeAppriseConfig(config)
|
|
queue := n.queue
|
|
disabled := !n.appriseConfig.Enabled
|
|
n.mu.Unlock()
|
|
|
|
if disabled && queue != nil {
|
|
if err := queue.CancelByTypes([]string{"apprise", "apprise" + queueTypeSuffixResolved}, "Apprise notifications disabled"); err != nil {
|
|
log.Error().Err(err).Msg("failed to cancel queued Apprise notifications after disabling delivery")
|
|
}
|
|
}
|
|
}
|
|
|
|
// GetAppriseConfig returns a copy of the Apprise configuration.
|
|
func (n *NotificationManager) GetAppriseConfig() AppriseConfig {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
return copyAppriseConfig(n.appriseConfig)
|
|
}
|
|
|
|
// SetCooldown updates the cooldown duration
|
|
func (n *NotificationManager) SetCooldown(minutes int) {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
if minutes < 0 {
|
|
minutes = 0
|
|
}
|
|
n.cooldown = time.Duration(minutes) * time.Minute
|
|
log.Info().Int("minutes", minutes).Msg("updated notification cooldown")
|
|
}
|
|
|
|
// SetNotifyOnResolve toggles whether resolved alerts send notifications.
|
|
func (n *NotificationManager) SetNotifyOnResolve(enabled bool) {
|
|
n.mu.Lock()
|
|
was := n.notifyOnResolve
|
|
n.notifyOnResolve = enabled
|
|
n.mu.Unlock()
|
|
|
|
if was != enabled {
|
|
log.Info().Bool("enabled", enabled).Msg("updated resolved alert notifications")
|
|
}
|
|
}
|
|
|
|
// GetNotifyOnResolve returns whether resolved alerts trigger notifications.
|
|
func (n *NotificationManager) GetNotifyOnResolve() bool {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
return n.notifyOnResolve
|
|
}
|
|
|
|
// SetGroupingWindow updates the grouping window duration
|
|
func (n *NotificationManager) SetGroupingWindow(seconds int) {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
if seconds < 0 {
|
|
seconds = 0
|
|
}
|
|
n.groupWindow = time.Duration(seconds) * time.Second
|
|
log.Info().Int("seconds", seconds).Msg("updated notification grouping window")
|
|
}
|
|
|
|
// SetGroupingOptions updates grouping options
|
|
func (n *NotificationManager) SetGroupingOptions(byNode, byGuest bool) {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
n.groupByNode = byNode
|
|
n.groupByGuest = byGuest
|
|
log.Info().Bool("byNode", byNode).Bool("byGuest", byGuest).Msg("updated notification grouping options")
|
|
}
|
|
|
|
// AddWebhook adds a webhook configuration
|
|
func (n *NotificationManager) AddWebhook(webhook WebhookConfig) {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
n.webhooks = append(n.webhooks, copyWebhookConfig(webhook))
|
|
}
|
|
|
|
// UpdateWebhook updates an existing webhook
|
|
func (n *NotificationManager) UpdateWebhook(webhookID string, webhook WebhookConfig) error {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
|
|
for i, w := range n.webhooks {
|
|
if w.ID == webhookID {
|
|
n.webhooks[i] = copyWebhookConfig(webhook)
|
|
return nil
|
|
}
|
|
}
|
|
return fmt.Errorf("webhook not found: %s", webhookID)
|
|
}
|
|
|
|
// DeleteWebhook removes a webhook
|
|
func (n *NotificationManager) DeleteWebhook(webhookID string) error {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
|
|
for i, w := range n.webhooks {
|
|
if w.ID == webhookID {
|
|
n.webhooks = append(n.webhooks[:i], n.webhooks[i+1:]...)
|
|
return nil
|
|
}
|
|
}
|
|
return fmt.Errorf("webhook not found: %s", webhookID)
|
|
}
|
|
|
|
// GetWebhooks returns all webhook configurations
|
|
func (n *NotificationManager) GetWebhooks() []WebhookConfig {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
|
|
if len(n.webhooks) == 0 {
|
|
return []WebhookConfig{}
|
|
}
|
|
|
|
return copyWebhookConfigs(n.webhooks)
|
|
}
|
|
|
|
// GetEmailConfig returns the email configuration
|
|
func (n *NotificationManager) GetEmailConfig() EmailConfig {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
return copyEmailConfig(n.emailConfig)
|
|
}
|
|
|
|
// GetQueue returns the notification queue
|
|
func (n *NotificationManager) GetQueue() *NotificationQueue {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
return n.queue
|
|
}
|
|
|
|
// SetEnabled toggles notification delivery globally for this runtime instance.
|
|
func (n *NotificationManager) SetEnabled(enabled bool) {
|
|
var (
|
|
queue *NotificationQueue
|
|
changed bool
|
|
)
|
|
|
|
n.mu.Lock()
|
|
changed = n.enabled != enabled
|
|
n.enabled = enabled
|
|
if !enabled {
|
|
for i := range n.pendingAlerts {
|
|
n.pendingAlerts[i] = nil
|
|
}
|
|
n.pendingAlerts = n.pendingAlerts[:0]
|
|
if n.groupTimer != nil {
|
|
n.groupTimer.Stop()
|
|
n.groupTimer = nil
|
|
}
|
|
queue = n.queue
|
|
}
|
|
n.mu.Unlock()
|
|
|
|
if changed {
|
|
log.Info().Bool("enabled", enabled).Msg("updated notification manager enabled state")
|
|
}
|
|
|
|
if !enabled && queue != nil {
|
|
types := []string{
|
|
"email", "email" + queueTypeSuffixResolved,
|
|
"webhook", "webhook" + queueTypeSuffixResolved,
|
|
"apprise", "apprise" + queueTypeSuffixResolved,
|
|
}
|
|
if err := queue.CancelByTypes(types, "Notifications disabled"); err != nil {
|
|
log.Error().Err(err).Msg("failed to cancel queued notifications after disabling notification manager")
|
|
}
|
|
}
|
|
}
|
|
|
|
// IsEnabled reports whether notification delivery is currently enabled.
|
|
func (n *NotificationManager) IsEnabled() bool {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
return n.enabled
|
|
}
|
|
|
|
// SendAlert sends notifications for an alert
|
|
func (n *NotificationManager) SendAlert(alert *alerts.Alert) {
|
|
if alert == nil {
|
|
return
|
|
}
|
|
alert = alert.Clone()
|
|
if alert == nil {
|
|
return
|
|
}
|
|
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
|
|
log.Info().
|
|
Str("alertID", alert.ID).
|
|
Bool("enabled", n.enabled).
|
|
Int("webhooks", len(n.webhooks)).
|
|
Bool("emailEnabled", n.emailConfig.Enabled).
|
|
Msg("send alert called")
|
|
|
|
if !n.enabled {
|
|
log.Debug().Msg("notifications disabled, skipping")
|
|
return
|
|
}
|
|
|
|
// Check cooldown
|
|
record, exists := n.lastNotified[alert.ID]
|
|
if exists && record.alertStart.Equal(alert.StartTime) && time.Since(record.lastSent) < n.cooldown {
|
|
log.Info().
|
|
Str("alertID", alert.ID).
|
|
Str("resourceName", alert.ResourceName).
|
|
Str("type", alert.Type).
|
|
Dur("timeSince", time.Since(record.lastSent)).
|
|
Dur("cooldown", n.cooldown).
|
|
Dur("remainingCooldown", n.cooldown-time.Since(record.lastSent)).
|
|
Msg("alert notification in cooldown for active alert - notification suppressed")
|
|
return
|
|
}
|
|
|
|
log.Info().
|
|
Str("alertID", alert.ID).
|
|
Str("resourceName", alert.ResourceName).
|
|
Str("type", alert.Type).
|
|
Float64("value", alert.Value).
|
|
Float64("threshold", alert.Threshold).
|
|
Bool("inCooldown", exists).
|
|
Msg("alert passed cooldown check - adding to pending notifications")
|
|
|
|
// Add to pending alerts for grouping
|
|
n.pendingAlerts = append(n.pendingAlerts, alert)
|
|
|
|
// If this is the first alert in the group, start the timer
|
|
if n.groupTimer == nil {
|
|
n.groupTimer = time.AfterFunc(n.groupWindow, func() {
|
|
n.sendGroupedAlerts()
|
|
})
|
|
log.Debug().
|
|
Int("pendingCount", len(n.pendingAlerts)).
|
|
Dur("groupWindow", n.groupWindow).
|
|
Msg("started alert grouping timer")
|
|
}
|
|
}
|
|
|
|
func (n *NotificationManager) markAlertsNotified(alertsToSend []*alerts.Alert, sentAt time.Time) {
|
|
n.mu.Lock()
|
|
if n.lastNotified == nil {
|
|
n.lastNotified = make(map[string]notificationRecord)
|
|
}
|
|
for _, alert := range alertsToSend {
|
|
if alert == nil {
|
|
continue
|
|
}
|
|
n.lastNotified[alert.ID] = notificationRecord{
|
|
lastSent: sentAt,
|
|
alertStart: alert.StartTime,
|
|
}
|
|
}
|
|
n.mu.Unlock()
|
|
}
|
|
|
|
// SendResolvedAlert delivers notifications for a resolved alert immediately.
|
|
func (n *NotificationManager) SendResolvedAlert(resolved *alerts.ResolvedAlert) {
|
|
if resolved == nil || resolved.Alert == nil {
|
|
return
|
|
}
|
|
|
|
// Clone the alert so downstream goroutines cannot mutate shared state.
|
|
alertCopy := resolved.Alert.Clone()
|
|
if alertCopy == nil {
|
|
return
|
|
}
|
|
|
|
resolvedAt := resolved.ResolvedTime
|
|
if resolvedAt.IsZero() {
|
|
resolvedAt = time.Now()
|
|
}
|
|
annotateResolvedMetadata(alertCopy, resolvedAt)
|
|
|
|
n.mu.RLock()
|
|
enabled := n.enabled && n.notifyOnResolve
|
|
emailConfig := copyEmailConfig(n.emailConfig)
|
|
webhooks := copyWebhookConfigs(n.webhooks)
|
|
appriseConfig := copyAppriseConfig(n.appriseConfig)
|
|
queue := n.queue
|
|
n.mu.RUnlock()
|
|
|
|
if !enabled {
|
|
log.Debug().
|
|
Str("alertID", alertCopy.ID).
|
|
Msg("resolved notifications disabled, skipping")
|
|
return
|
|
}
|
|
|
|
alertsToSend := []*alerts.Alert{alertCopy}
|
|
jobs := buildNotificationDeliveryJobs(emailConfig, webhooks, appriseConfig, alertsToSend, eventResolved, resolvedAt)
|
|
|
|
if queue != nil {
|
|
n.enqueueNotificationJobs(queue, jobs)
|
|
} else {
|
|
n.dispatchNotificationJobsAsync(jobs)
|
|
}
|
|
}
|
|
|
|
// CancelAlert removes pending notifications for a resolved alert
|
|
func (n *NotificationManager) CancelAlert(alertID string) {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
|
|
if len(n.pendingAlerts) == 0 {
|
|
return
|
|
}
|
|
|
|
filtered := n.pendingAlerts[:0]
|
|
removed := 0
|
|
for _, pending := range n.pendingAlerts {
|
|
if pending == nil {
|
|
continue
|
|
}
|
|
if pending.ID == alertID {
|
|
removed++
|
|
continue
|
|
}
|
|
filtered = append(filtered, pending)
|
|
}
|
|
|
|
if removed == 0 {
|
|
return
|
|
}
|
|
|
|
for i := len(filtered); i < len(n.pendingAlerts); i++ {
|
|
n.pendingAlerts[i] = nil
|
|
}
|
|
|
|
n.pendingAlerts = filtered
|
|
|
|
if len(n.pendingAlerts) == 0 && n.groupTimer != nil {
|
|
if n.groupTimer.Stop() {
|
|
log.Debug().Str("alertID", alertID).Msg("stopped grouping timer after alert cancellation")
|
|
}
|
|
n.groupTimer = nil
|
|
}
|
|
|
|
// Clean up cooldown record for resolved alert
|
|
delete(n.lastNotified, alertID)
|
|
|
|
// Cancel any queued notifications containing this alert
|
|
if n.queue != nil {
|
|
if err := n.queue.CancelByAlertIdentifiers([]string{alertID}); err != nil {
|
|
log.Error().Err(err).Str("alertID", alertID).Msg("failed to cancel queued notifications")
|
|
}
|
|
}
|
|
|
|
log.Debug().
|
|
Str("alertID", alertID).
|
|
Int("remaining", len(n.pendingAlerts)).
|
|
Msg("removed resolved alert from pending notifications and cooldown map")
|
|
}
|
|
|
|
// sendGroupedAlerts sends all pending alerts as a group
|
|
func (n *NotificationManager) sendGroupedAlerts() {
|
|
n.mu.Lock()
|
|
if len(n.pendingAlerts) == 0 {
|
|
n.mu.Unlock()
|
|
return
|
|
}
|
|
|
|
// Copy alerts to send
|
|
alertsToSend := make([]*alerts.Alert, len(n.pendingAlerts))
|
|
copy(alertsToSend, n.pendingAlerts)
|
|
|
|
// Clear pending alerts
|
|
n.pendingAlerts = n.pendingAlerts[:0]
|
|
if n.groupTimer != nil {
|
|
n.groupTimer.Stop()
|
|
}
|
|
n.groupTimer = nil
|
|
|
|
log.Info().
|
|
Int("alertCount", len(alertsToSend)).
|
|
Msg("sending grouped alert notifications")
|
|
|
|
// Snapshot configuration while holding the lock to avoid races with concurrent updates
|
|
emailConfig := copyEmailConfig(n.emailConfig)
|
|
webhooks := copyWebhookConfigs(n.webhooks)
|
|
appriseConfig := copyAppriseConfig(n.appriseConfig)
|
|
queue := n.queue
|
|
n.mu.Unlock()
|
|
|
|
jobs := buildNotificationDeliveryJobs(emailConfig, webhooks, appriseConfig, alertsToSend, eventAlert, time.Time{})
|
|
|
|
// Use persistent queue if available, otherwise send directly
|
|
if queue != nil {
|
|
if anyFailed := n.enqueueNotificationJobs(queue, jobs); anyFailed {
|
|
n.markAlertsNotified(alertsToSend, time.Now())
|
|
}
|
|
// Note: Cooldown will be marked after successful dequeue and send
|
|
} else {
|
|
n.dispatchNotificationJobsAsync(jobs)
|
|
// For direct sends, mark cooldown immediately (fire-and-forget)
|
|
n.markAlertsNotified(alertsToSend, time.Now())
|
|
}
|
|
}
|
|
|
|
func buildNotificationDeliveryJobs(emailConfig EmailConfig, webhooks []WebhookConfig, appriseConfig AppriseConfig, alertsToSend []*alerts.Alert, event notificationEvent, resolvedAt time.Time) []notificationDeliveryJob {
|
|
jobs := make([]notificationDeliveryJob, 0, 2+len(webhooks))
|
|
if emailConfig.Enabled {
|
|
emailCopy := emailConfig
|
|
jobs = append(jobs, notificationDeliveryJob{
|
|
Type: "email",
|
|
Event: event,
|
|
Alerts: alertsToSend,
|
|
ResolvedAt: resolvedAt,
|
|
EmailConfig: &emailCopy,
|
|
})
|
|
}
|
|
for _, webhook := range webhooks {
|
|
if !webhook.Enabled {
|
|
continue
|
|
}
|
|
webhookCopy := webhook
|
|
jobs = append(jobs, notificationDeliveryJob{
|
|
Type: "webhook",
|
|
Event: event,
|
|
Alerts: alertsToSend,
|
|
ResolvedAt: resolvedAt,
|
|
WebhookConfig: &webhookCopy,
|
|
})
|
|
}
|
|
if appriseConfig.Enabled {
|
|
appriseCopy := appriseConfig
|
|
jobs = append(jobs, notificationDeliveryJob{
|
|
Type: "apprise",
|
|
Event: event,
|
|
Alerts: alertsToSend,
|
|
ResolvedAt: resolvedAt,
|
|
AppriseConfig: &appriseCopy,
|
|
})
|
|
}
|
|
return jobs
|
|
}
|
|
|
|
func queueTypeForNotificationDeliveryJob(job notificationDeliveryJob) string {
|
|
queueType := job.Type
|
|
if job.Event == eventResolved {
|
|
queueType += queueTypeSuffixResolved
|
|
}
|
|
return queueType
|
|
}
|
|
|
|
func configJSONForNotificationDeliveryJob(job notificationDeliveryJob) ([]byte, error) {
|
|
switch job.Type {
|
|
case "email":
|
|
if job.EmailConfig == nil {
|
|
return nil, fmt.Errorf("missing email config")
|
|
}
|
|
return json.Marshal(*job.EmailConfig)
|
|
case "webhook":
|
|
if job.WebhookConfig == nil {
|
|
return nil, fmt.Errorf("missing webhook config")
|
|
}
|
|
return json.Marshal(*job.WebhookConfig)
|
|
case "apprise":
|
|
if job.AppriseConfig == nil {
|
|
return nil, fmt.Errorf("missing apprise config")
|
|
}
|
|
return json.Marshal(*job.AppriseConfig)
|
|
default:
|
|
return nil, fmt.Errorf("unknown notification type: %s", job.Type)
|
|
}
|
|
}
|
|
|
|
func (n *NotificationManager) enqueueNotificationJobs(queue *NotificationQueue, jobs []notificationDeliveryJob) bool {
|
|
if queue == nil {
|
|
return false
|
|
}
|
|
|
|
anyFailed := false
|
|
for _, job := range jobs {
|
|
configJSON, err := configJSONForNotificationDeliveryJob(job)
|
|
if err != nil {
|
|
n.logNotificationJobError(job, err, "failed to marshal config for queue")
|
|
continue
|
|
}
|
|
|
|
notif := &QueuedNotification{
|
|
Type: queueTypeForNotificationDeliveryJob(job),
|
|
Alerts: job.Alerts,
|
|
Config: configJSON,
|
|
MaxAttempts: 3,
|
|
}
|
|
if err := queue.Enqueue(notif); err != nil {
|
|
anyFailed = true
|
|
n.logNotificationJobError(job, err, "failed to enqueue notification - falling back to direct send")
|
|
n.dispatchNotificationJobAsync(job, "failed to send notification after queue enqueue failure")
|
|
continue
|
|
}
|
|
|
|
logger := log.Debug().
|
|
Str("type", job.Type).
|
|
Str("event", string(job.Event)).
|
|
Int("alertCount", len(job.Alerts))
|
|
if job.WebhookConfig != nil {
|
|
logger = logger.Str("webhookName", job.WebhookConfig.Name)
|
|
}
|
|
logger.Msg("enqueued notification delivery job")
|
|
}
|
|
|
|
if anyFailed && len(jobs) > 0 && jobs[0].Event == eventResolved {
|
|
log.Debug().Msg("at least one resolved notification enqueue failed; direct sends were triggered")
|
|
}
|
|
return anyFailed
|
|
}
|
|
|
|
func (n *NotificationManager) dispatchNotificationJobsAsync(jobs []notificationDeliveryJob) {
|
|
for _, job := range jobs {
|
|
if job.Type == "email" && job.EmailConfig != nil {
|
|
log.Info().
|
|
Int("alertCount", len(job.Alerts)).
|
|
Str("smtpHost", job.EmailConfig.SMTPHost).
|
|
Int("smtpPort", job.EmailConfig.SMTPPort).
|
|
Strs("recipients", job.EmailConfig.To).
|
|
Bool("hasAuth", job.EmailConfig.Username != "" && job.EmailConfig.Password != "").
|
|
Msg("email notifications enabled - dispatching delivery job")
|
|
}
|
|
n.dispatchNotificationJobAsync(job, "failed to send notification")
|
|
}
|
|
}
|
|
|
|
func (n *NotificationManager) sendNotificationsDirect(emailConfig EmailConfig, webhooks []WebhookConfig, appriseConfig AppriseConfig, alertsToSend []*alerts.Alert) {
|
|
n.dispatchNotificationJobsAsync(buildNotificationDeliveryJobs(emailConfig, webhooks, appriseConfig, alertsToSend, eventAlert, time.Time{}))
|
|
}
|
|
|
|
func (n *NotificationManager) sendResolvedNotificationsDirect(emailConfig EmailConfig, webhooks []WebhookConfig, appriseConfig AppriseConfig, alertsToSend []*alerts.Alert, resolvedAt time.Time) {
|
|
if len(alertsToSend) == 0 {
|
|
return
|
|
}
|
|
n.dispatchNotificationJobsAsync(buildNotificationDeliveryJobs(emailConfig, webhooks, appriseConfig, alertsToSend, eventResolved, resolvedAt))
|
|
}
|
|
|
|
func (n *NotificationManager) dispatchNotificationJobAsync(job notificationDeliveryJob, failureMessage string) {
|
|
spawnAsync(func() {
|
|
if err := n.deliverNotificationJob(job); err != nil {
|
|
n.logNotificationJobError(job, err, failureMessage)
|
|
}
|
|
})
|
|
}
|
|
|
|
func (n *NotificationManager) deliverNotificationJob(job notificationDeliveryJob) error {
|
|
switch job.Type {
|
|
case "email":
|
|
if job.EmailConfig == nil {
|
|
return fmt.Errorf("missing email config")
|
|
}
|
|
if job.Event == eventResolved {
|
|
return n.sendResolvedEmail(*job.EmailConfig, job.Alerts, job.ResolvedAt)
|
|
}
|
|
return n.sendGroupedEmail(*job.EmailConfig, job.Alerts)
|
|
case "webhook":
|
|
if job.WebhookConfig == nil {
|
|
return fmt.Errorf("missing webhook config")
|
|
}
|
|
if job.Event == eventResolved {
|
|
return n.sendResolvedWebhook(*job.WebhookConfig, job.Alerts, job.ResolvedAt)
|
|
}
|
|
return n.sendGroupedWebhook(*job.WebhookConfig, job.Alerts)
|
|
case "apprise":
|
|
if job.AppriseConfig == nil {
|
|
return fmt.Errorf("missing apprise config")
|
|
}
|
|
if job.Event == eventResolved {
|
|
return n.sendResolvedApprise(*job.AppriseConfig, job.Alerts, job.ResolvedAt)
|
|
}
|
|
return n.sendGroupedApprise(*job.AppriseConfig, job.Alerts)
|
|
default:
|
|
return fmt.Errorf("unknown notification type: %s", job.Type)
|
|
}
|
|
}
|
|
|
|
func (n *NotificationManager) logNotificationJobError(job notificationDeliveryJob, err error, message string) {
|
|
logger := log.Error().
|
|
Err(err).
|
|
Str("type", job.Type).
|
|
Str("event", string(job.Event))
|
|
if job.WebhookConfig != nil {
|
|
logger = logger.Str("webhookName", job.WebhookConfig.Name)
|
|
}
|
|
logger.Msg(message)
|
|
}
|
|
|
|
// sendGroupedEmail sends a grouped email notification
|
|
func (n *NotificationManager) sendGroupedEmail(config EmailConfig, alertList []*alerts.Alert) error {
|
|
|
|
// Don't check for recipients here - sendHTMLEmail handles empty recipients
|
|
// by using the From address as the recipient
|
|
|
|
// Generate email using template
|
|
subject, htmlBody, textBody := EmailTemplate(alertList, false)
|
|
|
|
// Send using HTML-aware method
|
|
return n.sendHTMLEmailWithError(subject, htmlBody, textBody, config)
|
|
}
|
|
|
|
func (n *NotificationManager) sendResolvedEmail(config EmailConfig, alertList []*alerts.Alert, resolvedAt time.Time) error {
|
|
if len(alertList) == 0 {
|
|
return fmt.Errorf("no alerts to send")
|
|
}
|
|
|
|
subject, htmlBody, textBody := buildResolvedNotificationContent(alertList, resolvedAt, n.publicURL)
|
|
if subject == "" && textBody == "" {
|
|
return fmt.Errorf("failed to build resolved email content")
|
|
}
|
|
|
|
return n.sendHTMLEmailWithError(subject, htmlBody, textBody, config)
|
|
}
|
|
|
|
func (n *NotificationManager) sendGroupedApprise(config AppriseConfig, alertList []*alerts.Alert) error {
|
|
if len(alertList) == 0 {
|
|
return fmt.Errorf("no alerts to send")
|
|
}
|
|
|
|
cfg := NormalizeAppriseConfig(config)
|
|
if !cfg.Enabled {
|
|
return fmt.Errorf("apprise not enabled")
|
|
}
|
|
|
|
title, body, notifyType := buildApprisePayload(alertList, n.publicURL)
|
|
if title == "" && body == "" {
|
|
return fmt.Errorf("failed to build apprise payload")
|
|
}
|
|
|
|
switch cfg.Mode {
|
|
case AppriseModeHTTP:
|
|
if err := n.sendAppriseViaHTTP(cfg, title, body, notifyType); err != nil {
|
|
log.Warn().
|
|
Err(err).
|
|
Str("mode", string(cfg.Mode)).
|
|
Str("serverUrl", cfg.ServerURL).
|
|
Msg("failed to send Apprise notification via API")
|
|
return fmt.Errorf("apprise HTTP send failed: %w", err)
|
|
}
|
|
default:
|
|
if err := n.sendAppriseViaCLI(cfg, title, body); err != nil {
|
|
log.Warn().
|
|
Err(err).
|
|
Str("mode", string(cfg.Mode)).
|
|
Str("cliPath", cfg.CLIPath).
|
|
Strs("targets", cfg.Targets).
|
|
Msg("failed to send Apprise notification")
|
|
return fmt.Errorf("apprise CLI send failed: %w", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func buildApprisePayload(alertList []*alerts.Alert, publicURL string) (string, string, string) {
|
|
validAlerts := make([]*alerts.Alert, 0, len(alertList))
|
|
var primary *alerts.Alert
|
|
for _, alert := range alertList {
|
|
if alert == nil {
|
|
continue
|
|
}
|
|
if primary == nil {
|
|
primary = alert
|
|
}
|
|
validAlerts = append(validAlerts, alert)
|
|
}
|
|
|
|
if len(validAlerts) == 0 || primary == nil {
|
|
return "", "", "info"
|
|
}
|
|
|
|
title := fmt.Sprintf("Pulse alert: %s", primary.ResourceName)
|
|
if len(validAlerts) > 1 {
|
|
title = fmt.Sprintf("Pulse alerts (%d)", len(validAlerts))
|
|
}
|
|
|
|
var bodyBuilder strings.Builder
|
|
bodyBuilder.WriteString(primary.Message)
|
|
bodyBuilder.WriteString("\n\n")
|
|
|
|
for _, alert := range validAlerts {
|
|
bodyBuilder.WriteString(fmt.Sprintf("[%s] %s", strings.ToUpper(string(alert.Level)), alert.ResourceName))
|
|
bodyBuilder.WriteString(fmt.Sprintf(" — value %.2f (threshold %.2f)\n", alert.Value, alert.Threshold))
|
|
if alert.Node != "" {
|
|
bodyBuilder.WriteString(fmt.Sprintf("Node: %s\n", alertNodeDisplay(alert)))
|
|
}
|
|
if alert.Instance != "" && alert.Instance != alert.Node {
|
|
bodyBuilder.WriteString(fmt.Sprintf("Instance: %s\n", alert.Instance))
|
|
}
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
|
|
if publicURL != "" {
|
|
bodyBuilder.WriteString("Dashboard: " + publicURL + "\n")
|
|
}
|
|
|
|
return title, bodyBuilder.String(), resolveAppriseNotificationType(validAlerts)
|
|
}
|
|
|
|
func buildResolvedNotificationContent(alertList []*alerts.Alert, resolvedAt time.Time, publicURL string) (string, string, string) {
|
|
validAlerts := make([]*alerts.Alert, 0, len(alertList))
|
|
var primary *alerts.Alert
|
|
for _, alert := range alertList {
|
|
if alert == nil {
|
|
continue
|
|
}
|
|
if primary == nil {
|
|
primary = alert
|
|
}
|
|
validAlerts = append(validAlerts, alert)
|
|
}
|
|
|
|
if len(validAlerts) == 0 || primary == nil {
|
|
return "", "", ""
|
|
}
|
|
|
|
if resolvedAt.IsZero() {
|
|
resolvedAt = time.Now()
|
|
}
|
|
resolvedLabel := resolvedAt.Format(time.RFC3339)
|
|
|
|
title := fmt.Sprintf("Pulse alert resolved: %s", primary.ResourceName)
|
|
if len(validAlerts) > 1 {
|
|
title = fmt.Sprintf("Pulse alerts resolved (%d)", len(validAlerts))
|
|
}
|
|
|
|
var bodyBuilder strings.Builder
|
|
bodyBuilder.WriteString("Resolved at ")
|
|
bodyBuilder.WriteString(resolvedLabel)
|
|
bodyBuilder.WriteString("\n\n")
|
|
|
|
for _, alert := range validAlerts {
|
|
bodyBuilder.WriteString(fmt.Sprintf("[%s] %s\n", strings.ToUpper(string(alert.Level)), alert.ResourceName))
|
|
if alert.Message != "" {
|
|
bodyBuilder.WriteString(alert.Message)
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
if !alert.StartTime.IsZero() {
|
|
bodyBuilder.WriteString("Started: ")
|
|
bodyBuilder.WriteString(alert.StartTime.Format(time.RFC3339))
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
bodyBuilder.WriteString("Cleared: ")
|
|
bodyBuilder.WriteString(resolvedLabel)
|
|
bodyBuilder.WriteString("\n")
|
|
if alert.Node != "" {
|
|
bodyBuilder.WriteString("Node: ")
|
|
bodyBuilder.WriteString(alertNodeDisplay(alert))
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
if alert.Instance != "" && alert.Instance != alert.Node {
|
|
bodyBuilder.WriteString("Instance: ")
|
|
bodyBuilder.WriteString(alert.Instance)
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
if alert.Threshold != 0 || alert.Value != 0 {
|
|
bodyBuilder.WriteString(fmt.Sprintf("Last value %.2f (threshold %.2f)\n", alert.Value, alert.Threshold))
|
|
}
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
|
|
if publicURL != "" {
|
|
bodyBuilder.WriteString("Dashboard: ")
|
|
bodyBuilder.WriteString(publicURL)
|
|
bodyBuilder.WriteString("\n")
|
|
}
|
|
|
|
textBody := bodyBuilder.String()
|
|
htmlBody := "<pre style=\"font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, \\\"Liberation Mono\\\", \\\"Courier New\\\", monospace\">" +
|
|
html.EscapeString(textBody) + "</pre>"
|
|
|
|
return title, htmlBody, textBody
|
|
}
|
|
|
|
func resolveAppriseNotificationType(alertList []*alerts.Alert) string {
|
|
notifyType := "info"
|
|
for _, alert := range alertList {
|
|
if alert == nil {
|
|
continue
|
|
}
|
|
switch alert.Level {
|
|
case alerts.AlertLevelCritical:
|
|
return "failure"
|
|
case alerts.AlertLevelWarning:
|
|
notifyType = "warning"
|
|
}
|
|
}
|
|
return notifyType
|
|
}
|
|
|
|
func (n *NotificationManager) sendAppriseViaCLI(cfg AppriseConfig, title, body string) error {
|
|
if len(cfg.Targets) == 0 {
|
|
return fmt.Errorf("no Apprise targets configured for CLI delivery")
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(cfg.TimeoutSeconds)*time.Second)
|
|
defer cancel()
|
|
|
|
args := []string{"-t", title, "-b", body}
|
|
args = append(args, cfg.Targets...)
|
|
|
|
execFn := n.appriseExec
|
|
if execFn == nil {
|
|
execFn = defaultAppriseExec
|
|
}
|
|
|
|
output, err := execFn(ctx, cfg.CLIPath, args)
|
|
if err != nil {
|
|
if len(output) > 0 {
|
|
log.Debug().
|
|
Str("cliPath", cfg.CLIPath).
|
|
Strs("targets", cfg.Targets).
|
|
Str("output", string(output)).
|
|
Msg("apprise CLI output (error)")
|
|
}
|
|
return fmt.Errorf("execute apprise CLI %q: %w", cfg.CLIPath, err)
|
|
}
|
|
|
|
if len(output) > 0 {
|
|
log.Debug().
|
|
Str("cliPath", cfg.CLIPath).
|
|
Strs("targets", cfg.Targets).
|
|
Str("output", string(output)).
|
|
Msg("apprise CLI output")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (n *NotificationManager) validatedWebhookBaseURL(rawURL string) (*url.URL, error) {
|
|
baseURL, err := securityutil.NormalizeHTTPBaseURL(rawURL, "")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := n.ValidateWebhookURL(baseURL.String()); err != nil {
|
|
return nil, err
|
|
}
|
|
return baseURL, nil
|
|
}
|
|
|
|
func (n *NotificationManager) sendAppriseViaHTTP(cfg AppriseConfig, title, body, notifyType string) error {
|
|
if cfg.ServerURL == "" {
|
|
return fmt.Errorf("apprise server URL is not configured")
|
|
}
|
|
|
|
serverURL := cfg.ServerURL
|
|
lowerURL := strings.ToLower(serverURL)
|
|
if !strings.HasPrefix(lowerURL, "http://") && !strings.HasPrefix(lowerURL, "https://") {
|
|
return fmt.Errorf("apprise server URL must start with http or https: %s", serverURL)
|
|
}
|
|
|
|
validatedBaseURL, err := n.validatedWebhookBaseURL(serverURL)
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("serverURL", serverURL).
|
|
Msg("apprise server URL validation failed - possible SSRF attempt")
|
|
return fmt.Errorf("apprise server URL validation failed: %w", err)
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(cfg.TimeoutSeconds)*time.Second)
|
|
defer cancel()
|
|
|
|
notifyEndpoint := "/notify"
|
|
if cfg.ConfigKey != "" {
|
|
notifyEndpoint = "/notify/" + url.PathEscape(cfg.ConfigKey)
|
|
}
|
|
|
|
targetURL, err := securityutil.ResolveRelativeURL(validatedBaseURL, notifyEndpoint)
|
|
if err != nil {
|
|
return fmt.Errorf("apprise server URL validation failed: %w", err)
|
|
}
|
|
|
|
payload := map[string]any{
|
|
"body": body,
|
|
"title": title,
|
|
}
|
|
if len(cfg.Targets) > 0 {
|
|
payload["urls"] = cfg.Targets
|
|
}
|
|
if notifyType != "" {
|
|
payload["type"] = notifyType
|
|
}
|
|
|
|
payloadBytes, err := json.Marshal(payload)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal Apprise payload: %w", err)
|
|
}
|
|
|
|
req, err := securityutil.NewValidatedRequestWithContext(ctx, http.MethodPost, targetURL, bytes.NewReader(payloadBytes))
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create Apprise request: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("Accept", "application/json")
|
|
|
|
if cfg.APIKey != "" {
|
|
if cfg.APIKeyHeader == "" {
|
|
req.Header.Set("X-API-KEY", cfg.APIKey)
|
|
} else {
|
|
req.Header.Set(cfg.APIKeyHeader, cfg.APIKey)
|
|
}
|
|
}
|
|
|
|
client := n.createSecureWebhookClientWithTLS(
|
|
time.Duration(cfg.TimeoutSeconds)*time.Second,
|
|
validatedBaseURL.Scheme == "https" && cfg.SkipTLSVerify,
|
|
)
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to reach Apprise server: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
limited := io.LimitReader(resp.Body, WebhookMaxResponseSize)
|
|
respBody, _ := io.ReadAll(limited)
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
if len(respBody) > 0 {
|
|
return fmt.Errorf("apprise server returned HTTP %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
|
|
}
|
|
return fmt.Errorf("apprise server returned HTTP %d", resp.StatusCode)
|
|
}
|
|
|
|
if len(respBody) > 0 {
|
|
log.Debug().
|
|
Str("mode", string(cfg.Mode)).
|
|
Str("serverUrl", cfg.ServerURL).
|
|
Str("response", string(respBody)).
|
|
Msg("apprise API response")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (n *NotificationManager) sendResolvedApprise(config AppriseConfig, alertList []*alerts.Alert, resolvedAt time.Time) error {
|
|
if len(alertList) == 0 {
|
|
return fmt.Errorf("no alerts to send")
|
|
}
|
|
|
|
cfg := NormalizeAppriseConfig(config)
|
|
if !cfg.Enabled {
|
|
return fmt.Errorf("apprise not enabled")
|
|
}
|
|
|
|
title, _, body := buildResolvedNotificationContent(alertList, resolvedAt, n.publicURL)
|
|
if title == "" && body == "" {
|
|
return fmt.Errorf("failed to build resolved apprise payload")
|
|
}
|
|
|
|
switch cfg.Mode {
|
|
case AppriseModeHTTP:
|
|
if err := n.sendAppriseViaHTTP(cfg, title, body, "info"); err != nil {
|
|
log.Warn().
|
|
Err(err).
|
|
Str("mode", string(cfg.Mode)).
|
|
Str("serverUrl", cfg.ServerURL).
|
|
Msg("failed to send resolved Apprise notification via API")
|
|
return fmt.Errorf("apprise HTTP send failed: %w", err)
|
|
}
|
|
default:
|
|
if err := n.sendAppriseViaCLI(cfg, title, body); err != nil {
|
|
log.Warn().
|
|
Err(err).
|
|
Str("mode", string(cfg.Mode)).
|
|
Str("cliPath", cfg.CLIPath).
|
|
Strs("targets", cfg.Targets).
|
|
Msg("failed to send resolved Apprise notification")
|
|
return fmt.Errorf("apprise CLI send failed: %w", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// sendEmail sends an email notification
|
|
func (n *NotificationManager) sendSingleEmailWithError(alert *alerts.Alert, config EmailConfig) error {
|
|
subject, htmlBody, textBody := EmailTemplate([]*alerts.Alert{alert}, true)
|
|
return n.sendHTMLEmailWithError(subject, htmlBody, textBody, config)
|
|
}
|
|
|
|
func (n *NotificationManager) sendEmail(alert *alerts.Alert) {
|
|
n.mu.RLock()
|
|
config := n.emailConfig
|
|
n.mu.RUnlock()
|
|
|
|
if err := n.sendSingleEmailWithError(alert, config); err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("alertID", alert.ID).
|
|
Msg("failed to send alert email notification")
|
|
}
|
|
}
|
|
|
|
func effectiveEmailRecipients(config EmailConfig) []string {
|
|
recipients := config.To
|
|
if len(recipients) == 0 && config.From != "" {
|
|
recipients = []string{config.From}
|
|
}
|
|
return recipients
|
|
}
|
|
|
|
func (n *NotificationManager) emailDeliveryManager(config EmailConfig) (*EnhancedEmailManager, []string) {
|
|
recipients := effectiveEmailRecipients(config)
|
|
|
|
n.mu.RLock()
|
|
manager := n.emailManager
|
|
n.mu.RUnlock()
|
|
|
|
if manager == nil {
|
|
rl := effectiveEmailRateLimit(config.RateLimit)
|
|
manager = NewEnhancedEmailManager(EmailProviderConfig{
|
|
EmailConfig: EmailConfig{
|
|
From: config.From,
|
|
To: recipients,
|
|
SMTPHost: config.SMTPHost,
|
|
SMTPPort: config.SMTPPort,
|
|
Username: config.Username,
|
|
Password: config.Password,
|
|
TLS: config.TLS,
|
|
StartTLS: config.StartTLS,
|
|
},
|
|
Provider: config.Provider,
|
|
StartTLS: config.StartTLS,
|
|
MaxRetries: 2,
|
|
RetryDelay: 3,
|
|
RateLimit: rl,
|
|
SkipTLSVerify: false,
|
|
AuthRequired: config.Username != "" && config.Password != "",
|
|
})
|
|
return manager, recipients
|
|
}
|
|
|
|
manager.config.EmailConfig = EmailConfig{
|
|
From: config.From,
|
|
To: recipients,
|
|
SMTPHost: config.SMTPHost,
|
|
SMTPPort: config.SMTPPort,
|
|
Username: config.Username,
|
|
Password: config.Password,
|
|
TLS: config.TLS,
|
|
StartTLS: config.StartTLS,
|
|
}
|
|
manager.config.Provider = config.Provider
|
|
manager.config.StartTLS = config.StartTLS
|
|
manager.config.RateLimit = effectiveEmailRateLimit(config.RateLimit)
|
|
manager.config.AuthRequired = config.Username != "" && config.Password != ""
|
|
|
|
if manager.rateLimit != nil {
|
|
manager.rateLimit.mu.Lock()
|
|
manager.rateLimit.rate = manager.config.RateLimit
|
|
manager.rateLimit.mu.Unlock()
|
|
}
|
|
|
|
return manager, recipients
|
|
}
|
|
|
|
// sendHTMLEmailWithError sends an HTML email with multipart content and returns any error
|
|
func (n *NotificationManager) sendHTMLEmailWithError(subject, htmlBody, textBody string, config EmailConfig) error {
|
|
config = normalizeEmailConfig(config)
|
|
|
|
recipients := effectiveEmailRecipients(config)
|
|
if len(config.To) == 0 && config.From != "" {
|
|
log.Info().
|
|
Str("from", config.From).
|
|
Msg("using From address as recipient since To is empty")
|
|
}
|
|
|
|
manager, recipients := n.emailDeliveryManager(config)
|
|
|
|
log.Info().
|
|
Str("smtp", fmt.Sprintf("%s:%d", config.SMTPHost, config.SMTPPort)).
|
|
Str("from", config.From).
|
|
Strs("to", recipients).
|
|
Bool("hasAuth", config.Username != "" && config.Password != "").
|
|
Bool("startTLS", manager.config.StartTLS).
|
|
Msg("attempting to send email via SMTP with enhanced support")
|
|
|
|
err := manager.SendEmailWithRetry(subject, htmlBody, textBody)
|
|
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("smtp", fmt.Sprintf("%s:%d", config.SMTPHost, config.SMTPPort)).
|
|
Strs("recipients", recipients).
|
|
Msg("failed to send email notification")
|
|
return fmt.Errorf("failed to send email: %w", err)
|
|
}
|
|
|
|
log.Info().
|
|
Strs("recipients", recipients).
|
|
Int("recipientCount", len(recipients)).
|
|
Msg("email notification sent successfully")
|
|
return nil
|
|
}
|
|
|
|
// sendHTMLEmail sends an HTML email with multipart content
|
|
func (n *NotificationManager) sendHTMLEmail(subject, htmlBody, textBody string, config EmailConfig) {
|
|
if err := n.sendHTMLEmailWithError(subject, htmlBody, textBody, config); err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("smtp", fmt.Sprintf("%s:%d", config.SMTPHost, config.SMTPPort)).
|
|
Msg("failed to send HTML email notification")
|
|
}
|
|
}
|
|
|
|
type webhookRenderMode string
|
|
|
|
const (
|
|
webhookRenderModeSingle webhookRenderMode = "single"
|
|
webhookRenderModeGrouped webhookRenderMode = "grouped"
|
|
webhookRenderModeResolved webhookRenderMode = "resolved"
|
|
)
|
|
|
|
func (n *NotificationManager) prepareWebhookDeliveryContext(webhook WebhookConfig, data WebhookPayloadData) (WebhookConfig, WebhookPayloadData, error) {
|
|
renderedURL, renderErr := renderWebhookURL(webhook.URL, data)
|
|
if renderErr != nil {
|
|
return webhook, data, fmt.Errorf("render webhook URL template: %w", renderErr)
|
|
}
|
|
webhook.URL = renderedURL
|
|
|
|
switch webhook.Service {
|
|
case "telegram":
|
|
chatID, chatErr := extractTelegramChatID(renderedURL)
|
|
if chatErr != nil {
|
|
return webhook, data, fmt.Errorf("extract Telegram chat_id: %w", chatErr)
|
|
}
|
|
if chatID != "" {
|
|
data.ChatID = chatID
|
|
}
|
|
case "pagerduty":
|
|
if data.CustomFields == nil {
|
|
data.CustomFields = make(map[string]interface{})
|
|
}
|
|
if routingKey, ok := webhook.Headers["routing_key"]; ok {
|
|
data.CustomFields["routing_key"] = routingKey
|
|
}
|
|
}
|
|
|
|
return webhook, data, nil
|
|
}
|
|
|
|
func webhookServiceTemplate(webhook WebhookConfig, mode webhookRenderMode) (string, bool) {
|
|
if webhook.Service == "" || webhook.Service == "generic" {
|
|
return "", false
|
|
}
|
|
|
|
for _, tmpl := range GetWebhookTemplates() {
|
|
if tmpl.Service != webhook.Service {
|
|
continue
|
|
}
|
|
switch mode {
|
|
case webhookRenderModeResolved:
|
|
if tmpl.ResolvedPayloadTemplate != "" {
|
|
return tmpl.ResolvedPayloadTemplate, true
|
|
}
|
|
default:
|
|
if tmpl.PayloadTemplate != "" {
|
|
return tmpl.PayloadTemplate, true
|
|
}
|
|
}
|
|
break
|
|
}
|
|
|
|
return "", false
|
|
}
|
|
|
|
func (n *NotificationManager) renderWebhookPayloadJSON(webhook WebhookConfig, data WebhookPayloadData, mode webhookRenderMode, fallback func() ([]byte, error)) ([]byte, error) {
|
|
if webhook.Template != "" && strings.TrimSpace(webhook.Template) != "" {
|
|
jsonData, err := n.generatePayloadFromTemplateWithService(webhook.Template, data, webhook.Service)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("generate webhook payload from custom template: %w", err)
|
|
}
|
|
return jsonData, nil
|
|
}
|
|
|
|
if templateStr, ok := webhookServiceTemplate(webhook, mode); ok {
|
|
jsonData, err := n.generatePayloadFromTemplateWithService(templateStr, data, webhook.Service)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("generate webhook payload for service %s: %w", webhook.Service, err)
|
|
}
|
|
return jsonData, nil
|
|
}
|
|
|
|
return fallback()
|
|
}
|
|
|
|
// sendGroupedWebhook sends a grouped webhook notification
|
|
func (n *NotificationManager) sendGroupedWebhook(webhook WebhookConfig, alertList []*alerts.Alert) error {
|
|
if len(alertList) == 0 {
|
|
return fmt.Errorf("no alerts to send")
|
|
}
|
|
|
|
// Create a shallow copy of the primary alert to avoid mutating the original memory
|
|
// when we modify the message for grouped summaries.
|
|
originalPrimary := alertList[0]
|
|
alertCopy := *originalPrimary
|
|
primaryAlert := &alertCopy
|
|
customFields := convertWebhookCustomFields(webhook.CustomFields)
|
|
data := n.prepareWebhookData(primaryAlert, customFields)
|
|
data.AlertCount = len(alertList)
|
|
data.Alerts = alertList
|
|
data.Mention = webhook.Mention
|
|
|
|
// Check if webhook has a custom template first
|
|
// Only use custom template if it's not empty
|
|
if webhook.Template != "" && strings.TrimSpace(webhook.Template) != "" && len(alertList) > 0 {
|
|
// Use custom template with enhanced message for grouped alerts
|
|
alert := primaryAlert
|
|
if len(alertList) > 1 {
|
|
// Build a full list of all alerts
|
|
summary := alert.Message
|
|
otherAlerts := []string{}
|
|
for i := 1; i < len(alertList); i++ { // Show ALL alerts
|
|
otherAlerts = append(otherAlerts, fmt.Sprintf("• %s: %.1f%%", alertList[i].ResourceName, alertList[i].Value))
|
|
}
|
|
if len(otherAlerts) > 0 {
|
|
// For custom templates, we need to escape newlines since they're likely
|
|
// used in shell commands or other contexts that need escaping
|
|
alert.Message = fmt.Sprintf("%s\\n\\nAll %d alerts:\\n%s", summary, len(alertList), strings.Join(otherAlerts, "\\n"))
|
|
}
|
|
}
|
|
}
|
|
|
|
if webhook.Service != "" && webhook.Service != "generic" && len(alertList) > 0 {
|
|
// For service-specific webhooks, use the first alert with a note about others
|
|
// For simplicity, send the first alert with a note about others
|
|
// Most webhook services work better with single structured payloads
|
|
alert := primaryAlert
|
|
|
|
// Modify message if multiple alerts - but format differently for Discord
|
|
if len(alertList) > 1 {
|
|
summary := alert.Message
|
|
otherAlerts := []string{}
|
|
for i := 1; i < len(alertList); i++ {
|
|
otherAlerts = append(otherAlerts, fmt.Sprintf("• %s: %.1f%%", alertList[i].ResourceName, alertList[i].Value))
|
|
}
|
|
if len(otherAlerts) > 0 {
|
|
// For Discord, format as a single line list to avoid newline issues
|
|
// Discord embeds don't render \n in description anyway
|
|
if webhook.Service == "discord" {
|
|
// Use comma-separated list for Discord
|
|
alert.Message = fmt.Sprintf("%s | %d alerts: %s", summary, len(alertList), strings.Join(otherAlerts, ", "))
|
|
} else {
|
|
// For other services, escape newlines properly
|
|
alert.Message = fmt.Sprintf("%s\\n\\nAll %d alerts:\\n%s", summary, len(alertList), strings.Join(otherAlerts, "\\n"))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
var err error
|
|
webhook, data, err = n.prepareWebhookDeliveryContext(webhook, data)
|
|
if err != nil {
|
|
return fmt.Errorf("prepare grouped webhook context: %w", err)
|
|
}
|
|
|
|
jsonData, err := n.renderWebhookPayloadJSON(webhook, data, webhookRenderModeGrouped, func() ([]byte, error) {
|
|
payload := map[string]interface{}{
|
|
"alerts": alertList,
|
|
"count": len(alertList),
|
|
"timestamp": time.Now().Unix(),
|
|
"source": "pulse-monitoring",
|
|
"grouped": true,
|
|
}
|
|
return json.Marshal(payload)
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("render grouped webhook payload: %w", err)
|
|
}
|
|
|
|
// Send using same request logic
|
|
return n.sendWebhookRequest(webhook, jsonData, "grouped")
|
|
}
|
|
|
|
func (n *NotificationManager) sendResolvedWebhook(webhook WebhookConfig, alertList []*alerts.Alert, resolvedAt time.Time) error {
|
|
if len(alertList) == 0 {
|
|
return fmt.Errorf("no alerts to send")
|
|
}
|
|
|
|
if !webhook.Enabled {
|
|
return fmt.Errorf("webhook is disabled")
|
|
}
|
|
|
|
if resolvedAt.IsZero() {
|
|
resolvedAt = time.Now()
|
|
}
|
|
|
|
// ntfy needs plain-text body + headers, not JSON
|
|
if webhook.Service == "ntfy" {
|
|
return n.sendResolvedWebhookNtfy(webhook, alertList, resolvedAt)
|
|
}
|
|
|
|
// Use the first non-nil alert for template data (most common case is single-alert recovery)
|
|
var alert *alerts.Alert
|
|
for _, a := range alertList {
|
|
if a != nil {
|
|
alert = a
|
|
break
|
|
}
|
|
}
|
|
if alert == nil {
|
|
return fmt.Errorf("all alerts in resolved list are nil")
|
|
}
|
|
|
|
// Prepare template data using the same pipeline as the firing path
|
|
customFields := convertWebhookCustomFields(webhook.CustomFields)
|
|
data := n.prepareWebhookData(alert, customFields)
|
|
|
|
// Override fields for resolved context
|
|
data.Event = "resolved"
|
|
data.ResolvedAt = resolvedAt.Format(time.RFC3339)
|
|
data.ResolvedAtISO = resolvedAt.Format(time.RFC3339)
|
|
data.Duration = formatWebhookDuration(resolvedAt.Sub(alert.StartTime))
|
|
data.Message = fmt.Sprintf("%s on %s is now healthy", alert.ResourceName, alert.Node)
|
|
|
|
var err error
|
|
webhook, data, err = n.prepareWebhookDeliveryContext(webhook, data)
|
|
if err != nil {
|
|
return fmt.Errorf("prepare resolved webhook context: %w", err)
|
|
}
|
|
|
|
jsonData, err := n.renderWebhookPayloadJSON(webhook, data, webhookRenderModeResolved, func() ([]byte, error) {
|
|
payload := map[string]interface{}{
|
|
"event": string(eventResolved),
|
|
"alerts": alertList,
|
|
"count": len(alertList),
|
|
"resolvedAt": resolvedAt.Unix(),
|
|
"resolvedAtIso": resolvedAt.Format(time.RFC3339),
|
|
"source": "pulse-monitoring",
|
|
}
|
|
|
|
if n.publicURL != "" {
|
|
payload["dashboard"] = n.publicURL
|
|
}
|
|
|
|
if len(alertList) == 1 {
|
|
payload["alertIdentifier"] = alert.ID
|
|
}
|
|
return json.Marshal(payload)
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("render resolved webhook payload: %w", err)
|
|
}
|
|
|
|
return n.sendWebhookRequest(webhook, jsonData, "resolved")
|
|
}
|
|
|
|
// sendResolvedWebhookNtfy sends a resolved webhook formatted for ntfy (plain text + headers)
|
|
func (n *NotificationManager) sendResolvedWebhookNtfy(webhook WebhookConfig, alertList []*alerts.Alert, resolvedAt time.Time) error {
|
|
// Re-validate webhook URL
|
|
if err := n.ValidateWebhookURL(webhook.URL); err != nil {
|
|
return fmt.Errorf("webhook URL validation failed: %w", err)
|
|
}
|
|
|
|
if !n.checkWebhookRateLimit(webhook.URL) {
|
|
return fmt.Errorf("rate limit exceeded for webhook %s", webhook.Name)
|
|
}
|
|
|
|
// Build plain-text body
|
|
var body strings.Builder
|
|
if len(alertList) == 1 && alertList[0] != nil {
|
|
a := alertList[0]
|
|
fmt.Fprintf(&body, "Resolved: %s on %s is now healthy", a.ResourceName, a.Node)
|
|
} else {
|
|
fmt.Fprintf(&body, "%d alerts resolved at %s:\n", len(alertList), resolvedAt.Format(time.RFC822))
|
|
for _, a := range alertList {
|
|
if a != nil {
|
|
fmt.Fprintf(&body, "- %s on %s\n", a.ResourceName, a.Node)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Build title
|
|
title := "RESOLVED"
|
|
if len(alertList) == 1 && alertList[0] != nil {
|
|
title = fmt.Sprintf("RESOLVED: %s", alertList[0].ResourceName)
|
|
} else {
|
|
title = fmt.Sprintf("RESOLVED: %d alerts", len(alertList))
|
|
}
|
|
|
|
method := webhook.Method
|
|
if method == "" {
|
|
method = "POST"
|
|
}
|
|
|
|
req, err := http.NewRequest(method, webhook.URL, bytes.NewBufferString(body.String()))
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create ntfy request: %w", err)
|
|
}
|
|
|
|
req.Header.Set("Content-Type", "text/plain")
|
|
req.Header.Set("Title", title)
|
|
req.Header.Set("Priority", "default")
|
|
req.Header.Set("Tags", "white_check_mark,pulse,resolved")
|
|
req.Header.Set("User-Agent", "Pulse-Monitoring/2.0")
|
|
|
|
// Apply any custom headers from webhook config
|
|
for key, value := range webhook.Headers {
|
|
if !strings.Contains(value, "{{") {
|
|
req.Header.Set(key, value)
|
|
}
|
|
}
|
|
|
|
resp, err := n.webhookClient.Do(req)
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Msg("failed to send resolved ntfy webhook")
|
|
return fmt.Errorf("failed to send ntfy webhook: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// Read response with size limit
|
|
limitedReader := io.LimitReader(resp.Body, WebhookMaxResponseSize)
|
|
var respBody bytes.Buffer
|
|
if _, err := respBody.ReadFrom(limitedReader); err != nil {
|
|
log.Warn().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Str("service", "ntfy").
|
|
Msg("failed to read resolved ntfy webhook response body")
|
|
return fmt.Errorf("failed to read ntfy webhook response: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
|
log.Info().
|
|
Str("webhook", webhook.Name).
|
|
Str("service", "ntfy").
|
|
Str("type", "resolved").
|
|
Int("status", resp.StatusCode).
|
|
Int("alertCount", len(alertList)).
|
|
Msg("resolved ntfy webhook sent successfully")
|
|
return nil
|
|
}
|
|
|
|
log.Warn().
|
|
Str("webhook", webhook.Name).
|
|
Str("service", "ntfy").
|
|
Int("status", resp.StatusCode).
|
|
Str("response", respBody.String()).
|
|
Msg("resolved ntfy webhook returned non-success status")
|
|
return fmt.Errorf("ntfy webhook returned HTTP %d: %s", resp.StatusCode, respBody.String())
|
|
}
|
|
|
|
// checkWebhookRateLimit checks if a webhook can be sent based on rate limits
|
|
func (n *NotificationManager) checkWebhookRateLimit(webhookURL string) bool {
|
|
n.webhookRateMu.Lock()
|
|
defer n.webhookRateMu.Unlock()
|
|
|
|
now := time.Now()
|
|
n.cleanupWebhookRateLimitsLocked(now)
|
|
limit, exists := n.webhookRateLimits[webhookURL]
|
|
|
|
if !exists {
|
|
// First time sending to this webhook
|
|
n.webhookRateLimits[webhookURL] = &webhookRateLimit{
|
|
lastSent: now,
|
|
sentCount: 1,
|
|
}
|
|
return true
|
|
}
|
|
|
|
// Check if we're still in the rate limit window
|
|
if now.Sub(limit.lastSent) > WebhookRateLimitWindow {
|
|
// Window expired, reset counter
|
|
limit.lastSent = now
|
|
limit.sentCount = 1
|
|
return true
|
|
}
|
|
|
|
// Still in window, check if we've exceeded the limit
|
|
if limit.sentCount >= WebhookRateLimitMax {
|
|
log.Warn().
|
|
Str("webhookURL", webhookURL).
|
|
Int("sentCount", limit.sentCount).
|
|
Dur("window", WebhookRateLimitWindow).
|
|
Msg("webhook rate limit exceeded, dropping request")
|
|
return false
|
|
}
|
|
|
|
// Increment counter and allow
|
|
limit.sentCount++
|
|
return true
|
|
}
|
|
|
|
func (n *NotificationManager) cleanupWebhookRateLimitsLocked(now time.Time) {
|
|
if now.Sub(n.webhookRateCleanup) < WebhookRateLimitWindow {
|
|
return
|
|
}
|
|
|
|
cutoff := now.Add(-WebhookRateLimitWindow)
|
|
cleaned := 0
|
|
for webhookURL, limit := range n.webhookRateLimits {
|
|
if limit.lastSent.Before(cutoff) {
|
|
delete(n.webhookRateLimits, webhookURL)
|
|
cleaned++
|
|
}
|
|
}
|
|
n.webhookRateCleanup = now
|
|
|
|
if cleaned > 0 {
|
|
log.Debug().
|
|
Int("cleaned", cleaned).
|
|
Int("remaining", len(n.webhookRateLimits)).
|
|
Msg("Cleaned up stale webhook rate limit entries")
|
|
}
|
|
}
|
|
|
|
// sendWebhookRequest sends the actual webhook request
|
|
func normalizeWebhookRequestURL(webhook WebhookConfig) string {
|
|
// For Telegram webhooks, strip chat_id from URL if present.
|
|
// The chat_id belongs in the payload, not in the request URL.
|
|
if webhook.Service == "telegram" && strings.Contains(webhook.URL, "chat_id=") {
|
|
if u, err := url.Parse(webhook.URL); err == nil {
|
|
q := u.Query()
|
|
q.Del("chat_id")
|
|
u.RawQuery = q.Encode()
|
|
return u.String()
|
|
}
|
|
}
|
|
return webhook.URL
|
|
}
|
|
|
|
func (n *NotificationManager) executeWebhookRequest(webhook WebhookConfig, payload []byte, opts webhookRequestOptions) (*webhookHTTPResult, error) {
|
|
if opts.validateURL {
|
|
if err := n.ValidateWebhookURL(webhook.URL); err != nil {
|
|
return nil, fmt.Errorf("webhook URL validation failed: %w", err)
|
|
}
|
|
}
|
|
|
|
method := webhook.Method
|
|
if method == "" {
|
|
method = "POST"
|
|
}
|
|
|
|
targetURL, err := securityutil.NormalizeAbsoluteHTTPURL(normalizeWebhookRequestURL(webhook))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to normalize webhook request URL: %w", err)
|
|
}
|
|
|
|
req, err := securityutil.NewValidatedRequestWithContext(context.Background(), method, targetURL, bytes.NewBuffer(payload))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create webhook request: %w", err)
|
|
}
|
|
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("User-Agent", opts.userAgent)
|
|
if webhook.Service == "ntfy" {
|
|
req.Header.Set("Content-Type", "text/plain")
|
|
}
|
|
|
|
for key, value := range webhook.Headers {
|
|
if strings.Contains(value, "{{") {
|
|
continue
|
|
}
|
|
req.Header.Set(key, value)
|
|
}
|
|
|
|
client := n.webhookClient
|
|
if client == nil || (opts.timeout > 0 && opts.timeout != WebhookTimeout) {
|
|
client = n.createSecureWebhookClient(opts.timeout)
|
|
}
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to send webhook: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
limitedReader := io.LimitReader(resp.Body, WebhookMaxResponseSize)
|
|
var respBody bytes.Buffer
|
|
bytesRead, err := respBody.ReadFrom(limitedReader)
|
|
if err != nil {
|
|
return &webhookHTTPResult{statusCode: resp.StatusCode, headers: resp.Header.Clone()}, fmt.Errorf("failed to read webhook response: %w", err)
|
|
}
|
|
if bytesRead >= WebhookMaxResponseSize {
|
|
log.Warn().
|
|
Str("webhook", webhook.Name).
|
|
Int64("bytesRead", bytesRead).
|
|
Int("maxSize", WebhookMaxResponseSize).
|
|
Msg("webhook response exceeded size limit, truncated")
|
|
}
|
|
|
|
result := &webhookHTTPResult{
|
|
statusCode: resp.StatusCode,
|
|
headers: resp.Header.Clone(),
|
|
body: respBody.String(),
|
|
}
|
|
|
|
if opts.responseLogging || resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
log.Debug().
|
|
Str("webhook", webhook.Name).
|
|
Int("status", resp.StatusCode).
|
|
Str("response", result.body).
|
|
Msg("webhook response")
|
|
}
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return result, fmt.Errorf("webhook returned HTTP %d: %s", resp.StatusCode, result.body)
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
func (n *NotificationManager) sendWebhookRequest(webhook WebhookConfig, jsonData []byte, alertType string) error {
|
|
// Re-validate webhook URL to prevent DNS rebinding attacks
|
|
if err := n.ValidateWebhookURL(webhook.URL); err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Str("url", webhook.URL).
|
|
Msg("webhook URL validation failed at send time - possible DNS rebinding")
|
|
return fmt.Errorf("webhook URL validation failed: %w", err)
|
|
}
|
|
|
|
// Check rate limit before sending
|
|
if !n.checkWebhookRateLimit(webhook.URL) {
|
|
log.Warn().
|
|
Str("webhook", webhook.Name).
|
|
Str("url", webhook.URL).
|
|
Msg("Webhook request dropped due to rate limiting")
|
|
return fmt.Errorf("rate limit exceeded for webhook %s", webhook.Name)
|
|
}
|
|
|
|
result, err := n.executeWebhookRequest(webhook, jsonData, webhookRequestOptions{
|
|
alertType: alertType,
|
|
timeout: WebhookTimeout,
|
|
userAgent: "Pulse-Monitoring/2.0",
|
|
responseLogging: false,
|
|
validateURL: false,
|
|
})
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Str("type", alertType).
|
|
Msg("failed to send webhook")
|
|
return err
|
|
}
|
|
|
|
if result.statusCode >= 200 && result.statusCode < 300 {
|
|
log.Info().
|
|
Str("webhook", webhook.Name).
|
|
Str("service", webhook.Service).
|
|
Str("type", alertType).
|
|
Int("status", result.statusCode).
|
|
Int("payloadSize", len(jsonData)).
|
|
Msg("webhook notification sent successfully")
|
|
|
|
// Log response body only in debug mode for successful requests
|
|
if len(result.body) > 0 {
|
|
log.Debug().
|
|
Str("webhook", webhook.Name).
|
|
Str("response", result.body).
|
|
Msg("webhook response body")
|
|
}
|
|
return nil
|
|
} else {
|
|
log.Warn().
|
|
Str("webhook", webhook.Name).
|
|
Str("service", webhook.Service).
|
|
Str("type", alertType).
|
|
Int("status", result.statusCode).
|
|
Str("response", result.body).
|
|
Msg("webhook returned non-success status")
|
|
return fmt.Errorf("webhook returned HTTP %d: %s", result.statusCode, result.body)
|
|
}
|
|
}
|
|
|
|
func (n *NotificationManager) sendSingleWebhookWithError(webhook WebhookConfig, alert *alerts.Alert) error {
|
|
customFields := convertWebhookCustomFields(webhook.CustomFields)
|
|
data := n.prepareWebhookData(alert, customFields)
|
|
|
|
var err error
|
|
webhook, data, err = n.prepareWebhookDeliveryContext(webhook, data)
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Msg("failed to prepare webhook delivery context")
|
|
return err
|
|
}
|
|
|
|
jsonData, err := n.renderWebhookPayloadJSON(webhook, data, webhookRenderModeSingle, func() ([]byte, error) {
|
|
payload := map[string]interface{}{
|
|
"alert": alert,
|
|
"timestamp": time.Now().Unix(),
|
|
"source": "pulse-monitoring",
|
|
}
|
|
return json.Marshal(payload)
|
|
})
|
|
if err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Str("alertID", alert.ID).
|
|
Msg("failed to render webhook payload")
|
|
return err
|
|
}
|
|
|
|
// Send using common request logic
|
|
if err := n.sendWebhookRequest(webhook, jsonData, fmt.Sprintf("alert-%s", alert.ID)); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// sendWebhook sends a webhook notification
|
|
func (n *NotificationManager) sendWebhook(webhook WebhookConfig, alert *alerts.Alert) {
|
|
if err := n.sendSingleWebhookWithError(webhook, alert); err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("webhook", webhook.Name).
|
|
Str("alertID", alert.ID).
|
|
Msg("failed to send webhook notification")
|
|
}
|
|
}
|
|
|
|
func convertWebhookCustomFields(fields map[string]string) map[string]interface{} {
|
|
if len(fields) == 0 {
|
|
return nil
|
|
}
|
|
|
|
converted := make(map[string]interface{}, len(fields))
|
|
for key, value := range fields {
|
|
converted[key] = value
|
|
}
|
|
return converted
|
|
}
|
|
|
|
func isEmptyInterface(value interface{}) bool {
|
|
switch v := value.(type) {
|
|
case string:
|
|
return strings.TrimSpace(v) == ""
|
|
case fmt.Stringer:
|
|
return strings.TrimSpace(v.String()) == ""
|
|
case nil:
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// prepareWebhookData prepares data for template rendering
|
|
func (n *NotificationManager) prepareWebhookData(alert *alerts.Alert, customFields map[string]interface{}) WebhookPayloadData {
|
|
duration := time.Since(alert.StartTime)
|
|
|
|
// Construct full Pulse URL if publicURL is configured
|
|
// The Instance field should contain the full URL to the Pulse dashboard
|
|
instance := ""
|
|
if n.publicURL != "" {
|
|
// Remove trailing slash from publicURL if present
|
|
instance = strings.TrimRight(n.publicURL, "/")
|
|
} else if alert.Instance != "" && (strings.HasPrefix(alert.Instance, "http://") || strings.HasPrefix(alert.Instance, "https://")) {
|
|
// If publicURL is not set but alert.Instance contains a full URL, use it
|
|
instance = alert.Instance
|
|
}
|
|
|
|
resourceType := ""
|
|
if alert.Metadata != nil {
|
|
if rt, ok := alert.Metadata["resourceType"].(string); ok {
|
|
resourceType = rt
|
|
}
|
|
}
|
|
|
|
var metadataCopy map[string]interface{}
|
|
if alert.Metadata != nil {
|
|
metadataCopy = make(map[string]interface{}, len(alert.Metadata))
|
|
for k, v := range alert.Metadata {
|
|
metadataCopy[k] = v
|
|
}
|
|
}
|
|
|
|
var ackTime string
|
|
if alert.AckTime != nil {
|
|
ackTime = alert.AckTime.Format(time.RFC3339)
|
|
}
|
|
|
|
// Round Value and Threshold to 1 decimal place for cleaner webhook payloads
|
|
roundedValue := math.Round(alert.Value*10) / 10
|
|
roundedThreshold := math.Round(alert.Threshold*10) / 10
|
|
|
|
return WebhookPayloadData{
|
|
ID: alert.ID,
|
|
Level: string(alert.Level),
|
|
Type: alert.Type,
|
|
ResourceName: alert.ResourceName,
|
|
ResourceID: alert.ResourceID,
|
|
Node: alert.Node,
|
|
NodeDisplayName: alertNodeDisplay(alert),
|
|
Instance: instance,
|
|
Message: alert.Message,
|
|
Value: roundedValue,
|
|
Threshold: roundedThreshold,
|
|
ValueFormatted: formatMetricValue(alert.Type, alert.Value),
|
|
ThresholdFormatted: formatMetricThreshold(alert.Type, alert.Threshold),
|
|
StartTime: alert.StartTime.Format(time.RFC3339),
|
|
Duration: formatWebhookDuration(duration),
|
|
Timestamp: time.Now().Format(time.RFC3339),
|
|
ResourceType: resourceType,
|
|
Acknowledged: alert.Acknowledged,
|
|
AckTime: ackTime,
|
|
AckUser: alert.AckUser,
|
|
Event: "alert",
|
|
Metadata: metadataCopy,
|
|
CustomFields: customFields,
|
|
AlertCount: 1,
|
|
}
|
|
}
|
|
|
|
func templateFuncMap() template.FuncMap {
|
|
return template.FuncMap{
|
|
"title": func(s string) string {
|
|
if s == "" {
|
|
return s
|
|
}
|
|
return strings.ToUpper(s[:1]) + strings.ToLower(s[1:])
|
|
},
|
|
"jsonString": func(v interface{}) string {
|
|
encoded, err := json.Marshal(v)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if len(encoded) >= 2 && encoded[0] == '"' && encoded[len(encoded)-1] == '"' {
|
|
return string(encoded[1 : len(encoded)-1])
|
|
}
|
|
return string(encoded)
|
|
},
|
|
"upper": strings.ToUpper,
|
|
"lower": strings.ToLower,
|
|
"printf": fmt.Sprintf,
|
|
"urlquery": template.URLQueryEscaper,
|
|
"urlencode": template.URLQueryEscaper,
|
|
"urlpath": url.PathEscape,
|
|
"pathescape": func(s string) string {
|
|
return url.PathEscape(s)
|
|
},
|
|
}
|
|
}
|
|
|
|
// generatePayloadFromTemplateWithService renders the payload using Go templates with service-specific handling
|
|
func (n *NotificationManager) generatePayloadFromTemplateWithService(templateStr string, data WebhookPayloadData, service string) ([]byte, error) {
|
|
tmpl, err := template.New("webhook").Funcs(templateFuncMap()).Parse(templateStr)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid template: %w", err)
|
|
}
|
|
|
|
var buf bytes.Buffer
|
|
if err := tmpl.Execute(&buf, data); err != nil {
|
|
return nil, fmt.Errorf("template execution failed: %w", err)
|
|
}
|
|
|
|
// Skip JSON validation for services that use plain text payloads
|
|
if service == "ntfy" {
|
|
// ntfy uses plain text, not JSON
|
|
return buf.Bytes(), nil
|
|
}
|
|
|
|
// Validate that the generated payload is valid JSON for other services
|
|
var jsonCheck interface{}
|
|
if err := json.Unmarshal(buf.Bytes(), &jsonCheck); err != nil {
|
|
log.Error().
|
|
Err(err).
|
|
Str("payload", buf.String()).
|
|
Msg("generated webhook payload is invalid JSON")
|
|
return nil, fmt.Errorf("template produced invalid JSON: %w", err)
|
|
}
|
|
|
|
return buf.Bytes(), nil
|
|
}
|
|
|
|
// renderWebhookURL applies template rendering to webhook URLs and ensures the result is a valid URL
|
|
func renderWebhookURL(urlTemplate string, data WebhookPayloadData) (string, error) {
|
|
trimmed := strings.TrimSpace(urlTemplate)
|
|
if trimmed == "" {
|
|
return "", fmt.Errorf("webhook URL cannot be empty")
|
|
}
|
|
|
|
if !strings.Contains(trimmed, "{{") {
|
|
return trimmed, nil
|
|
}
|
|
|
|
tmpl, err := template.New("webhook_url").Funcs(templateFuncMap()).Parse(trimmed)
|
|
if err != nil {
|
|
return "", fmt.Errorf("invalid webhook URL template: %w", err)
|
|
}
|
|
|
|
var buf bytes.Buffer
|
|
if err := tmpl.Execute(&buf, data); err != nil {
|
|
return "", fmt.Errorf("webhook URL template execution failed: %w", err)
|
|
}
|
|
|
|
rendered := strings.TrimSpace(buf.String())
|
|
if rendered == "" {
|
|
return "", fmt.Errorf("webhook URL template produced empty URL")
|
|
}
|
|
|
|
parsed, err := url.Parse(rendered)
|
|
if err != nil {
|
|
return "", fmt.Errorf("webhook URL template produced invalid URL: %w", err)
|
|
}
|
|
|
|
if parsed.Scheme == "" || parsed.Host == "" {
|
|
return "", fmt.Errorf("webhook URL template produced invalid URL: missing scheme or host")
|
|
}
|
|
|
|
return parsed.String(), nil
|
|
}
|
|
|
|
// formatWebhookDuration formats a duration in a human-readable way
|
|
func formatWebhookDuration(d time.Duration) string {
|
|
if d < time.Minute {
|
|
return fmt.Sprintf("%ds", int(d.Seconds()))
|
|
} else if d < time.Hour {
|
|
return fmt.Sprintf("%dm", int(d.Minutes()))
|
|
} else if d < 24*time.Hour {
|
|
return fmt.Sprintf("%dh %dm", int(d.Hours()), int(d.Minutes())%60)
|
|
} else {
|
|
days := int(d.Hours()) / 24
|
|
hours := int(d.Hours()) % 24
|
|
return fmt.Sprintf("%dd %dh", days, hours)
|
|
}
|
|
}
|
|
|
|
// extractTelegramChatID extracts and validates the chat_id from a Telegram webhook URL
|
|
func extractTelegramChatID(webhookURL string) (string, error) {
|
|
if !strings.Contains(webhookURL, "chat_id=") {
|
|
return "", fmt.Errorf("telegram webhook URL missing chat_id parameter")
|
|
}
|
|
|
|
u, err := url.Parse(webhookURL)
|
|
if err != nil {
|
|
return "", fmt.Errorf("invalid URL format: %w", err)
|
|
}
|
|
|
|
chatID := u.Query().Get("chat_id")
|
|
if chatID == "" {
|
|
return "", fmt.Errorf("chat_id parameter is empty")
|
|
}
|
|
|
|
// Validate that chat_id is numeric (Telegram chat IDs are always numeric)
|
|
// Handle negative IDs (group chats) and positive IDs (private chats)
|
|
if strings.HasPrefix(chatID, "-") {
|
|
if !isNumeric(chatID[1:]) {
|
|
return "", fmt.Errorf("chat_id must be numeric, got: %s", chatID)
|
|
}
|
|
} else if !isNumeric(chatID) {
|
|
return "", fmt.Errorf("chat_id must be numeric, got: %s", chatID)
|
|
}
|
|
|
|
return chatID, nil
|
|
}
|
|
|
|
// isNumeric checks if a string contains only digits
|
|
func isNumeric(s string) bool {
|
|
for _, char := range s {
|
|
if char < '0' || char > '9' {
|
|
return false
|
|
}
|
|
}
|
|
return len(s) > 0
|
|
}
|
|
|
|
// ValidateWebhookURL validates that a webhook URL is safe and properly formed
|
|
func (n *NotificationManager) ValidateWebhookURL(webhookURL string) error {
|
|
if webhookURL == "" {
|
|
return fmt.Errorf("webhook URL cannot be empty")
|
|
}
|
|
|
|
u, err := url.Parse(webhookURL)
|
|
if err != nil {
|
|
return fmt.Errorf("invalid URL format: %w", err)
|
|
}
|
|
if u.User != nil {
|
|
return fmt.Errorf("webhook URL userinfo is not allowed")
|
|
}
|
|
|
|
// Must be HTTP or HTTPS
|
|
if u.Scheme != "http" && u.Scheme != "https" {
|
|
return fmt.Errorf("webhook URL must use http or https protocol")
|
|
}
|
|
|
|
// Get hostname for validation
|
|
host := u.Hostname()
|
|
if host == "" {
|
|
return fmt.Errorf("webhook URL missing hostname")
|
|
}
|
|
|
|
// Block localhost and loopback addresses (SSRF protection) unless allowlisted
|
|
if host == "localhost" || host == "127.0.0.1" || host == "::1" || strings.HasPrefix(host, "127.") {
|
|
// Check if localhost is in the allowlist
|
|
localhostIP := net.ParseIP("127.0.0.1")
|
|
if !n.isIPInAllowlist(localhostIP) {
|
|
return fmt.Errorf("webhook URLs pointing to localhost are not allowed for security reasons")
|
|
}
|
|
log.Debug().
|
|
Str("host", host).
|
|
Str("url", webhookURL).
|
|
Msg("localhost webhook URL allowed via allowlist")
|
|
}
|
|
|
|
// Block link-local addresses
|
|
if strings.HasPrefix(host, "169.254.") || strings.HasPrefix(host, "fe80:") {
|
|
return fmt.Errorf("webhook URLs pointing to link-local addresses are not allowed")
|
|
}
|
|
|
|
// Resolve hostname to IPs and check for private ranges (DNS rebinding protection)
|
|
ips, err := net.LookupIP(host)
|
|
if err != nil {
|
|
// DNS resolution failed - reject for security
|
|
return fmt.Errorf("failed to resolve webhook hostname %s: %w (DNS resolution required for security)", host, err)
|
|
}
|
|
|
|
// Check all resolved IPs for private ranges
|
|
for _, ip := range ips {
|
|
if isPrivateIP(ip) {
|
|
// Check if this private IP is in the allowlist
|
|
if n.isIPInAllowlist(ip) {
|
|
log.Debug().
|
|
Str("ip", ip.String()).
|
|
Str("url", webhookURL).
|
|
Msg("webhook URL resolves to private IP in allowlist")
|
|
} else {
|
|
return fmt.Errorf("webhook URL resolves to private IP %s - private networks are not allowed for security (configure allowlist in System Settings)", ip.String())
|
|
}
|
|
}
|
|
}
|
|
|
|
// Block common metadata service endpoints (cloud providers)
|
|
metadataHosts := []string{
|
|
"169.254.169.254", // AWS, Azure, GCP metadata
|
|
"metadata.google.internal",
|
|
"metadata.goog",
|
|
}
|
|
for _, metadataHost := range metadataHosts {
|
|
if host == metadataHost {
|
|
return fmt.Errorf("webhook URLs pointing to cloud metadata services are not allowed")
|
|
}
|
|
}
|
|
|
|
// Ensure hostname is not just an IP address without proper DNS
|
|
// This helps prevent SSRF attacks using numeric IPs to bypass filters
|
|
if u.Scheme == "https" && isNumericIP(host) {
|
|
log.Warn().
|
|
Str("url", webhookURL).
|
|
Msg("webhook URL uses numeric IP with HTTPS - certificate validation may fail")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// isPrivateIP checks if an IP address is in a private range
|
|
func isPrivateIP(ip net.IP) bool {
|
|
// Private and special-use ranges that should not be reachable via webhooks.
|
|
privateRanges := []string{
|
|
"10.0.0.0/8", // RFC1918
|
|
"172.16.0.0/12", // RFC1918
|
|
"192.168.0.0/16", // RFC1918
|
|
"127.0.0.0/8", // Loopback
|
|
"169.254.0.0/16", // Link-local
|
|
"100.64.0.0/10", // CGNAT (RFC6598)
|
|
"198.18.0.0/15", // Benchmarking (RFC2544)
|
|
"0.0.0.0/8", // "This" network (RFC1122)
|
|
"192.0.0.0/24", // IETF protocol assignments (RFC6890)
|
|
"192.0.2.0/24", // Documentation TEST-NET-1 (RFC5737)
|
|
"198.51.100.0/24", // Documentation TEST-NET-2 (RFC5737)
|
|
"203.0.113.0/24", // Documentation TEST-NET-3 (RFC5737)
|
|
"::1/128", // IPv6 loopback
|
|
"::/128", // IPv6 unspecified (RFC4291)
|
|
"fe80::/10", // IPv6 link-local
|
|
"fc00::/7", // IPv6 unique local
|
|
}
|
|
|
|
for _, cidr := range privateRanges {
|
|
_, ipnet, err := net.ParseCIDR(cidr)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
if ipnet.Contains(ip) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// isNumericIP checks if a string is a numeric IP address
|
|
func isNumericIP(host string) bool {
|
|
// Simple check: if it contains only digits, dots, and colons, it's likely an IP
|
|
for _, char := range host {
|
|
if !(char >= '0' && char <= '9') && char != '.' && char != ':' {
|
|
return false
|
|
}
|
|
}
|
|
return len(host) > 0 && (strings.Contains(host, ".") || strings.Contains(host, ":"))
|
|
}
|
|
|
|
// UpdateAllowedPrivateCIDRs parses and updates the list of allowed private CIDR ranges for webhooks
|
|
// ParseAllowedPrivateCIDRs validates a comma-separated CIDR string and returns
|
|
// the parsed networks. It does NOT modify any runtime state — use
|
|
// ApplyAllowedPrivateCIDRs to commit the result.
|
|
func ParseAllowedPrivateCIDRs(cidrsString string) ([]*net.IPNet, error) {
|
|
if cidrsString == "" {
|
|
return nil, nil
|
|
}
|
|
|
|
cidrs := strings.Split(cidrsString, ",")
|
|
var parsedNets []*net.IPNet
|
|
|
|
for _, cidr := range cidrs {
|
|
cidr = strings.TrimSpace(cidr)
|
|
if cidr == "" {
|
|
continue
|
|
}
|
|
|
|
// Support bare IPs by adding /32 or /128
|
|
if !strings.Contains(cidr, "/") {
|
|
ip := net.ParseIP(cidr)
|
|
if ip == nil {
|
|
return nil, fmt.Errorf("invalid IP address: %s", cidr)
|
|
}
|
|
if ip.To4() != nil {
|
|
cidr = cidr + "/32"
|
|
} else {
|
|
cidr = cidr + "/128"
|
|
}
|
|
}
|
|
|
|
_, ipNet, err := net.ParseCIDR(cidr)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid CIDR range %s: %w", cidr, err)
|
|
}
|
|
|
|
parsedNets = append(parsedNets, ipNet)
|
|
}
|
|
|
|
return parsedNets, nil
|
|
}
|
|
|
|
// ApplyAllowedPrivateCIDRs atomically replaces the runtime allowlist with
|
|
// pre-validated networks (from ParseAllowedPrivateCIDRs).
|
|
func (n *NotificationManager) ApplyAllowedPrivateCIDRs(cidrsString string, nets []*net.IPNet) {
|
|
n.allowedPrivateMu.Lock()
|
|
defer n.allowedPrivateMu.Unlock()
|
|
|
|
n.allowedPrivateNets = nets
|
|
if len(nets) == 0 {
|
|
log.Info().Msg("webhook private IP allowlist cleared - all private IPs blocked")
|
|
} else {
|
|
log.Info().
|
|
Str("cidrs", cidrsString).
|
|
Int("count", len(nets)).
|
|
Msg("webhook private IP allowlist updated")
|
|
}
|
|
}
|
|
|
|
// UpdateAllowedPrivateCIDRs validates and applies CIDR changes in one call.
|
|
// Kept for backward compatibility; prefer ParseAllowedPrivateCIDRs + ApplyAllowedPrivateCIDRs
|
|
// when you need persist-before-mutate semantics.
|
|
func (n *NotificationManager) UpdateAllowedPrivateCIDRs(cidrsString string) error {
|
|
nets, err := ParseAllowedPrivateCIDRs(cidrsString)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
n.ApplyAllowedPrivateCIDRs(cidrsString, nets)
|
|
return nil
|
|
}
|
|
|
|
// isIPInAllowlist checks if an IP is in the configured allowlist
|
|
func (n *NotificationManager) isIPInAllowlist(ip net.IP) bool {
|
|
n.allowedPrivateMu.RLock()
|
|
defer n.allowedPrivateMu.RUnlock()
|
|
|
|
// No allowlist means block all private IPs
|
|
if len(n.allowedPrivateNets) == 0 {
|
|
return false
|
|
}
|
|
|
|
// Check if IP is in any allowed range
|
|
for _, ipNet := range n.allowedPrivateNets {
|
|
if ipNet.Contains(ip) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// addWebhookDelivery adds a webhook delivery record to the history
|
|
func (n *NotificationManager) addWebhookDelivery(delivery WebhookDelivery) {
|
|
n.mu.Lock()
|
|
defer n.mu.Unlock()
|
|
|
|
// Add to history
|
|
n.webhookHistory = append(n.webhookHistory, delivery)
|
|
|
|
// Keep only last 100 entries
|
|
if len(n.webhookHistory) > 100 {
|
|
// Remove oldest entry
|
|
n.webhookHistory = n.webhookHistory[1:]
|
|
}
|
|
}
|
|
|
|
// GetWebhookHistory returns recent webhook delivery history
|
|
func (n *NotificationManager) GetWebhookHistory() []WebhookDelivery {
|
|
n.mu.RLock()
|
|
defer n.mu.RUnlock()
|
|
|
|
// Return a copy to avoid concurrent access issues
|
|
history := make([]WebhookDelivery, len(n.webhookHistory))
|
|
copy(history, n.webhookHistory)
|
|
return history
|
|
}
|
|
|
|
func buildNotificationTestAlert() *alerts.Alert {
|
|
return &alerts.Alert{
|
|
ID: "test-alert",
|
|
Type: "cpu",
|
|
Level: "warning",
|
|
ResourceID: "test-resource",
|
|
ResourceName: "Test Resource",
|
|
Node: "pve-node-01",
|
|
Instance: "https://192.168.1.100:8006",
|
|
Message: "This is a test alert from Pulse Monitoring to verify your notification settings are working correctly",
|
|
Value: 95.5,
|
|
Threshold: 90,
|
|
StartTime: time.Now().Add(-5 * time.Minute), // Show it's been active for 5 minutes
|
|
LastSeen: time.Now(),
|
|
Metadata: map[string]interface{}{
|
|
"resourceType": "vm",
|
|
},
|
|
}
|
|
}
|
|
|
|
// GetQueueStats returns statistics about the notification queue
|
|
func (n *NotificationManager) GetQueueStats() (map[string]int, error) {
|
|
n.mu.RLock()
|
|
queue := n.queue
|
|
n.mu.RUnlock()
|
|
|
|
if queue == nil {
|
|
return nil, fmt.Errorf("notification queue not initialized")
|
|
}
|
|
return queue.GetQueueStats()
|
|
}
|
|
|
|
// SendTestNotification sends a test notification
|
|
func (n *NotificationManager) SendTestNotification(method string) error {
|
|
testAlert := buildNotificationTestAlert()
|
|
|
|
switch method {
|
|
case "email":
|
|
log.Info().
|
|
Bool("enabled", n.emailConfig.Enabled).
|
|
Str("smtp", n.emailConfig.SMTPHost).
|
|
Int("port", n.emailConfig.SMTPPort).
|
|
Str("from", n.emailConfig.From).
|
|
Int("toCount", len(n.emailConfig.To)).
|
|
Msg("testing email notification")
|
|
if !n.emailConfig.Enabled {
|
|
return fmt.Errorf("email notifications are not enabled")
|
|
}
|
|
return n.sendSingleEmailWithError(testAlert, n.emailConfig)
|
|
case "webhook":
|
|
n.mu.RLock()
|
|
if len(n.webhooks) == 0 {
|
|
n.mu.RUnlock()
|
|
return fmt.Errorf("no webhooks configured")
|
|
}
|
|
// Find first enabled webhook and copy it before releasing lock
|
|
var webhookToTest *WebhookConfig
|
|
for _, webhook := range n.webhooks {
|
|
if webhook.Enabled {
|
|
// Copy webhook to avoid race condition
|
|
webhookCopy := webhook
|
|
webhookToTest = &webhookCopy
|
|
break
|
|
}
|
|
}
|
|
n.mu.RUnlock()
|
|
|
|
if webhookToTest == nil {
|
|
return fmt.Errorf("no enabled webhooks found")
|
|
}
|
|
return n.SendTestWebhook(*webhookToTest)
|
|
case "apprise":
|
|
n.mu.RLock()
|
|
appriseConfig := n.appriseConfig
|
|
n.mu.RUnlock()
|
|
|
|
log.Info().
|
|
Bool("enabled", appriseConfig.Enabled).
|
|
Str("mode", string(appriseConfig.Mode)).
|
|
Int("targetCount", len(appriseConfig.Targets)).
|
|
Msg("testing Apprise notification")
|
|
|
|
if !appriseConfig.Enabled {
|
|
return fmt.Errorf("apprise notifications are not enabled")
|
|
}
|
|
|
|
// Use sendGroupedApprise with a single test alert
|
|
return n.sendGroupedApprise(appriseConfig, []*alerts.Alert{testAlert})
|
|
default:
|
|
return fmt.Errorf("unknown notification method: %s", method)
|
|
}
|
|
}
|
|
|
|
// SendTestAppriseWithConfig sends a test Apprise notification using provided config
|
|
func (n *NotificationManager) SendTestAppriseWithConfig(config AppriseConfig) error {
|
|
cfg := NormalizeAppriseConfig(config)
|
|
|
|
log.Info().
|
|
Bool("enabled", cfg.Enabled).
|
|
Str("mode", string(cfg.Mode)).
|
|
Int("targetCount", len(cfg.Targets)).
|
|
Str("serverURL", cfg.ServerURL).
|
|
Msg("testing Apprise notification with provided config")
|
|
|
|
if !cfg.Enabled {
|
|
switch cfg.Mode {
|
|
case AppriseModeCLI:
|
|
return fmt.Errorf("apprise notifications are not enabled in the provided configuration: at least one target is required for CLI mode")
|
|
case AppriseModeHTTP:
|
|
return fmt.Errorf("apprise notifications are not enabled in the provided configuration: server URL is required for API mode")
|
|
default:
|
|
return fmt.Errorf("apprise notifications are not enabled in the provided configuration")
|
|
}
|
|
}
|
|
|
|
return n.sendGroupedApprise(cfg, []*alerts.Alert{buildNotificationTestAlert()})
|
|
}
|
|
|
|
// SendTestWebhook sends a test notification to a specific webhook
|
|
func (n *NotificationManager) SendTestWebhook(webhook WebhookConfig) error {
|
|
_, _, err := n.TestEnhancedWebhook(BuildEnhancedWebhookTestConfig(webhook, webhook.Service))
|
|
return err
|
|
}
|
|
|
|
// SendTestNotificationWithConfig sends a test notification using provided config
|
|
func (n *NotificationManager) SendTestNotificationWithConfig(method string, config *EmailConfig, nodeInfo *TestNodeInfo) error {
|
|
// Use actual node info if provided, otherwise use defaults
|
|
nodeName := "test-node"
|
|
instanceURL := n.publicURL
|
|
if instanceURL == "" {
|
|
instanceURL = "https://proxmox.local:8006"
|
|
}
|
|
if nodeInfo != nil {
|
|
if nodeInfo.NodeName != "" {
|
|
nodeName = nodeInfo.NodeName
|
|
}
|
|
if nodeInfo.InstanceURL != "" {
|
|
instanceURL = nodeInfo.InstanceURL
|
|
}
|
|
}
|
|
|
|
testAlert := &alerts.Alert{
|
|
ID: "test-alert",
|
|
Type: "cpu",
|
|
Level: "warning",
|
|
ResourceID: "test-email-config",
|
|
ResourceName: "Email Configuration Test",
|
|
Node: nodeName,
|
|
Instance: instanceURL,
|
|
Message: "This is a test alert to verify your email notification settings are working correctly",
|
|
Value: 85.5,
|
|
Threshold: 80,
|
|
StartTime: time.Now(),
|
|
LastSeen: time.Now(),
|
|
Metadata: map[string]interface{}{
|
|
"resourceType": "test",
|
|
},
|
|
}
|
|
|
|
switch method {
|
|
case "email":
|
|
if config == nil {
|
|
return fmt.Errorf("email configuration is required")
|
|
}
|
|
|
|
log.Info().
|
|
Bool("enabled", config.Enabled).
|
|
Str("smtp", config.SMTPHost).
|
|
Int("port", config.SMTPPort).
|
|
Str("from", config.From).
|
|
Int("toCount", len(config.To)).
|
|
Strs("to", config.To).
|
|
Bool("smtpEmpty", config.SMTPHost == "").
|
|
Bool("fromEmpty", config.From == "").
|
|
Msg("testing email notification with provided config")
|
|
|
|
if !config.Enabled {
|
|
return fmt.Errorf("email notifications are not enabled in the provided configuration")
|
|
}
|
|
|
|
if config.SMTPHost == "" || config.From == "" {
|
|
return fmt.Errorf("email configuration is incomplete: SMTP host and from address are required")
|
|
}
|
|
|
|
return n.sendSingleEmailWithError(testAlert, *config)
|
|
|
|
default:
|
|
return fmt.Errorf("unsupported method for config-based testing: %s", method)
|
|
}
|
|
}
|
|
|
|
func normalizeQueueType(notifType string) (string, notificationEvent) {
|
|
if strings.HasSuffix(notifType, queueTypeSuffixResolved) {
|
|
return strings.TrimSuffix(notifType, queueTypeSuffixResolved), eventResolved
|
|
}
|
|
return notifType, eventAlert
|
|
}
|
|
|
|
func resolvedTimeFromAlerts(alerts []*alerts.Alert) time.Time {
|
|
for _, alert := range alerts {
|
|
if alert == nil || alert.Metadata == nil {
|
|
continue
|
|
}
|
|
raw, ok := alert.Metadata[metadataResolvedAt]
|
|
if !ok {
|
|
continue
|
|
}
|
|
switch ts := raw.(type) {
|
|
case string:
|
|
if parsed, err := time.Parse(time.RFC3339, ts); err == nil {
|
|
return parsed
|
|
}
|
|
case float64:
|
|
if ts > 0 {
|
|
return time.Unix(int64(ts), 0)
|
|
}
|
|
}
|
|
}
|
|
|
|
return time.Now()
|
|
}
|
|
|
|
// ProcessQueuedNotification processes a notification from the persistent queue
|
|
func (n *NotificationManager) ProcessQueuedNotification(notif *QueuedNotification) error {
|
|
baseType, event := normalizeQueueType(notif.Type)
|
|
|
|
log.Debug().
|
|
Str("notificationID", notif.ID).
|
|
Str("type", baseType).
|
|
Str("event", string(event)).
|
|
Int("alertCount", len(notif.Alerts)).
|
|
Msg("processing queued notification")
|
|
|
|
var err error
|
|
switch baseType {
|
|
case "email":
|
|
var emailConfig EmailConfig
|
|
if err = json.Unmarshal(notif.Config, &emailConfig); err != nil {
|
|
return fmt.Errorf("failed to unmarshal email config: %w", err)
|
|
}
|
|
currentEmailConfig := n.GetEmailConfig()
|
|
if !n.IsEnabled() || !currentEmailConfig.Enabled {
|
|
log.Info().
|
|
Str("notificationID", notif.ID).
|
|
Str("type", baseType).
|
|
Str("event", string(event)).
|
|
Msg("skipping queued email notification because email delivery is disabled")
|
|
return nil
|
|
}
|
|
err = n.deliverNotificationJob(notificationDeliveryJob{
|
|
Type: "email",
|
|
Event: event,
|
|
Alerts: notif.Alerts,
|
|
ResolvedAt: resolvedTimeFromAlerts(notif.Alerts),
|
|
EmailConfig: &emailConfig,
|
|
})
|
|
|
|
case "webhook":
|
|
var webhookConfig WebhookConfig
|
|
if err = json.Unmarshal(notif.Config, &webhookConfig); err != nil {
|
|
return fmt.Errorf("failed to unmarshal webhook config: %w", err)
|
|
}
|
|
if !n.IsEnabled() || !n.isQueuedWebhookStillEnabled(webhookConfig) {
|
|
log.Info().
|
|
Str("notificationID", notif.ID).
|
|
Str("type", baseType).
|
|
Str("event", string(event)).
|
|
Str("webhookID", webhookConfig.ID).
|
|
Msg("skipping queued webhook notification because delivery is disabled")
|
|
return nil
|
|
}
|
|
err = n.deliverNotificationJob(notificationDeliveryJob{
|
|
Type: "webhook",
|
|
Event: event,
|
|
Alerts: notif.Alerts,
|
|
ResolvedAt: resolvedTimeFromAlerts(notif.Alerts),
|
|
WebhookConfig: &webhookConfig,
|
|
})
|
|
|
|
case "apprise":
|
|
var appriseConfig AppriseConfig
|
|
if err = json.Unmarshal(notif.Config, &appriseConfig); err != nil {
|
|
return fmt.Errorf("failed to unmarshal apprise config: %w", err)
|
|
}
|
|
currentAppriseConfig := n.GetAppriseConfig()
|
|
if !n.IsEnabled() || !currentAppriseConfig.Enabled {
|
|
log.Info().
|
|
Str("notificationID", notif.ID).
|
|
Str("type", baseType).
|
|
Str("event", string(event)).
|
|
Msg("skipping queued Apprise notification because delivery is disabled")
|
|
return nil
|
|
}
|
|
err = n.deliverNotificationJob(notificationDeliveryJob{
|
|
Type: "apprise",
|
|
Event: event,
|
|
Alerts: notif.Alerts,
|
|
ResolvedAt: resolvedTimeFromAlerts(notif.Alerts),
|
|
AppriseConfig: &appriseConfig,
|
|
})
|
|
|
|
default:
|
|
return fmt.Errorf("unknown notification type: %s", baseType)
|
|
}
|
|
|
|
// Mark cooldown after successful send for active alerts only
|
|
if err == nil && event == eventAlert {
|
|
n.markAlertsNotified(notif.Alerts, time.Now())
|
|
}
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("process queued %s notification %q (%s): %w", baseType, notif.ID, event, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (n *NotificationManager) isQueuedWebhookStillEnabled(queuedWebhook WebhookConfig) bool {
|
|
if !queuedWebhook.Enabled {
|
|
return false
|
|
}
|
|
|
|
currentWebhooks := n.GetWebhooks()
|
|
if len(currentWebhooks) == 0 {
|
|
return false
|
|
}
|
|
|
|
for _, current := range currentWebhooks {
|
|
if queuedWebhook.ID != "" && current.ID == queuedWebhook.ID {
|
|
return current.Enabled
|
|
}
|
|
}
|
|
|
|
if queuedWebhook.URL == "" {
|
|
return false
|
|
}
|
|
|
|
for _, current := range currentWebhooks {
|
|
if current.URL == queuedWebhook.URL {
|
|
return current.Enabled
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// cleanupOldNotificationRecords periodically cleans up old entries from lastNotified map
|
|
func (n *NotificationManager) cleanupOldNotificationRecords() {
|
|
if n.cleanupDone != nil {
|
|
defer close(n.cleanupDone)
|
|
}
|
|
|
|
ticker := time.NewTicker(1 * time.Hour)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
n.mu.Lock()
|
|
now := time.Now()
|
|
cutoff := now.Add(-24 * time.Hour)
|
|
cleaned := 0
|
|
|
|
for alertID, record := range n.lastNotified {
|
|
// Remove entries older than 24 hours
|
|
if record.lastSent.Before(cutoff) {
|
|
delete(n.lastNotified, alertID)
|
|
cleaned++
|
|
}
|
|
}
|
|
|
|
if cleaned > 0 {
|
|
log.Debug().
|
|
Int("cleaned", cleaned).
|
|
Int("remaining", len(n.lastNotified)).
|
|
Msg("cleaned up old notification cooldown records")
|
|
}
|
|
n.mu.Unlock()
|
|
case <-n.stopCleanup:
|
|
// Stop cleanup when manager is stopped
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// Stop gracefully stops the notification manager
|
|
func (n *NotificationManager) Stop() {
|
|
n.stopOnce.Do(func() {
|
|
n.mu.Lock()
|
|
n.enabled = false
|
|
queue := n.queue
|
|
cleanupDone := n.cleanupDone
|
|
client := n.webhookClient
|
|
|
|
// Stop cleanup goroutine. Do not nil out the channel — the cleanup
|
|
// goroutine reads n.stopCleanup in its select loop, and nilling it
|
|
// between close and the goroutine re-entering select causes a race
|
|
// where the goroutine blocks on a nil channel forever.
|
|
if n.stopCleanup != nil {
|
|
close(n.stopCleanup)
|
|
}
|
|
|
|
// Nil out queue before unlocking
|
|
n.queue = nil
|
|
n.mu.Unlock()
|
|
|
|
// Wait for cleanup goroutine outside the lock to avoid deadlock.
|
|
if cleanupDone != nil {
|
|
<-cleanupDone
|
|
}
|
|
|
|
// Stop the notification queue if it exists
|
|
if queue != nil {
|
|
if err := queue.Stop(); err != nil {
|
|
log.Warn().Err(err).Msg("Notification queue stop returned error")
|
|
}
|
|
}
|
|
|
|
// Explicitly release pooled webhook connections during shutdown.
|
|
if client != nil {
|
|
client.CloseIdleConnections()
|
|
}
|
|
|
|
log.Info().Msg("NotificationManager stopped")
|
|
})
|
|
}
|