mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-07 00:37:36 +00:00
9851 lines
308 KiB
Go
9851 lines
308 KiB
Go
package api
|
||
|
||
import (
|
||
"bufio"
|
||
"context"
|
||
"crypto/sha256"
|
||
"encoding/base64"
|
||
"encoding/hex"
|
||
"encoding/json"
|
||
"errors"
|
||
"fmt"
|
||
"hash/fnv"
|
||
"io"
|
||
"math"
|
||
"net"
|
||
"net/http"
|
||
"net/url"
|
||
"os"
|
||
"path"
|
||
"path/filepath"
|
||
"runtime"
|
||
"sort"
|
||
"strconv"
|
||
"strings"
|
||
"sync"
|
||
"time"
|
||
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/agentexec"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/ai"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/ai/adapters"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/ai/baseline"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/ai/chat"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/ai/circuit"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/ai/forecast"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/ai/knowledge"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/ai/learning"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/ai/proxmox"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/ai/tools"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/ai/unified"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/alerts"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/config"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/deploy"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/metrics"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/mock"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/models"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/monitoring"
|
||
recoverymanager "github.com/rcourtman/pulse-go-rewrite/internal/recovery/manager"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/relay"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/servicediscovery"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/telemetry"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/truenas"
|
||
unifiedresources "github.com/rcourtman/pulse-go-rewrite/internal/unifiedresources"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/updates"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/utils"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/vmware"
|
||
"github.com/rcourtman/pulse-go-rewrite/internal/websocket"
|
||
"github.com/rcourtman/pulse-go-rewrite/pkg/aicontracts"
|
||
"github.com/rcourtman/pulse-go-rewrite/pkg/auth"
|
||
internalauth "github.com/rcourtman/pulse-go-rewrite/pkg/auth"
|
||
"github.com/rcourtman/pulse-go-rewrite/pkg/extensions"
|
||
metricstore "github.com/rcourtman/pulse-go-rewrite/pkg/metrics"
|
||
"github.com/rs/zerolog/log"
|
||
)
|
||
|
||
// Router handles HTTP routing
|
||
type Router struct {
|
||
mux *http.ServeMux
|
||
config *config.Config
|
||
monitor *monitoring.Monitor // Legacy/Default support
|
||
mtMonitor *monitoring.MultiTenantMonitor // Multi-tenant manager
|
||
alertHandlers *AlertHandlers
|
||
configHandlers *ConfigHandlers
|
||
trueNASHandlers *TrueNASHandlers
|
||
vmwareHandlers *VMwareHandlers
|
||
notificationHandlers *NotificationHandlers
|
||
notificationQueueHandlers *NotificationQueueHandlers
|
||
dockerAgentHandlers *DockerAgentHandlers
|
||
kubernetesAgentHandlers *KubernetesAgentHandlers
|
||
unifiedAgentHandlers *UnifiedAgentHandlers
|
||
systemSettingsHandler *SystemSettingsHandler
|
||
aiSettingsHandler *AISettingsHandler
|
||
aiHandler *AIHandler // AI chat handler
|
||
discoveryHandlers *DiscoveryHandlers
|
||
resourceHandlers *ResourceHandlers
|
||
resourceRegistry *unifiedresources.ResourceRegistry
|
||
trueNASPoller *monitoring.TrueNASPoller
|
||
vmwarePoller *monitoring.VMwarePoller
|
||
monitorResourceAdapter *unifiedresources.MonitorAdapter
|
||
monitorResourceAdapters map[string]*unifiedresources.MonitorAdapter
|
||
monitorAdapterMu sync.Mutex
|
||
monitorSupplementalRecords map[unifiedresources.DataSource]monitoring.MonitorSupplementalRecordsProvider
|
||
reportingHandlers *ReportingHandlers
|
||
configProfileHandler *ConfigProfileHandler
|
||
licenseHandlers *LicenseHandlers
|
||
recoveryHandlers *RecoveryHandlers
|
||
rbacProvider *TenantRBACProvider
|
||
logHandlers *LogHandlers
|
||
agentExecServer *agentexec.Server
|
||
deployHandlers *DeployHandlers
|
||
deployStore *deploy.Store
|
||
wsHub *websocket.Hub
|
||
reloadFunc func() error
|
||
updateManager *updates.Manager
|
||
updateHistory *updates.UpdateHistory
|
||
exportLimiter *RateLimiter
|
||
downloadLimiter *RateLimiter
|
||
signupRateLimiter *RateLimiter
|
||
handoffExchangeRateLimiter *RateLimiter
|
||
tenantRateLimiter *TenantRateLimiter
|
||
persistence *config.ConfigPersistence
|
||
multiTenant *config.MultiTenantPersistence
|
||
oidcMu sync.Mutex
|
||
oidcService *OIDCService
|
||
oidcManager *OIDCServiceManager
|
||
samlManager *SAMLServiceManager
|
||
ssoConfig *config.SSOConfig
|
||
sessionStore *SessionStore
|
||
csrfStore *CSRFTokenStore
|
||
recoveryTokenStore *RecoveryTokenStore
|
||
authorizer auth.Authorizer
|
||
wrapped http.Handler
|
||
serverVersion string
|
||
projectRoot string
|
||
// Cached system settings to avoid loading from disk on every request
|
||
settingsMu sync.RWMutex
|
||
cachedAllowEmbedding bool
|
||
cachedAllowedOrigins string
|
||
publicURLMu sync.Mutex
|
||
publicURLDetected bool
|
||
bootstrapTokenHash string
|
||
bootstrapTokenPath string
|
||
checksumMu sync.RWMutex
|
||
checksumCache map[string]checksumCacheEntry
|
||
installScriptClient *http.Client
|
||
relayMu sync.RWMutex
|
||
relayClient *relay.Client
|
||
relayCancel context.CancelFunc
|
||
lifecycleCtx context.Context
|
||
lifecycleCancel context.CancelFunc
|
||
hostedMode bool
|
||
conversionStore *conversionStore
|
||
stripeWebhookHandlers *StripeWebhookHandlers
|
||
patrolLifecycleMu sync.Mutex
|
||
startedPatrolOrgs map[string]bool
|
||
aiAutoFixEndpoints extensions.AIAutoFixEndpoints
|
||
aiAlertAnalysisEndpoints extensions.AIAlertAnalysisEndpoints
|
||
}
|
||
|
||
func pulseBinDir() string {
|
||
if dir := strings.TrimSpace(os.Getenv("PULSE_BIN_DIR")); dir != "" {
|
||
return dir
|
||
}
|
||
return "/opt/pulse/bin"
|
||
}
|
||
|
||
func storageChartsSelectedNodeName(resource unifiedresources.Resource) string {
|
||
if name := strings.TrimSpace(resource.Name); name != "" {
|
||
return name
|
||
}
|
||
if resource.TrueNAS != nil {
|
||
if hostname := strings.TrimSpace(resource.TrueNAS.Hostname); hostname != "" {
|
||
return hostname
|
||
}
|
||
}
|
||
for _, hostname := range resource.Identity.Hostnames {
|
||
if hostname = strings.TrimSpace(hostname); hostname != "" {
|
||
return hostname
|
||
}
|
||
}
|
||
return ""
|
||
}
|
||
|
||
func storageChartsSelectedNodeInstance(resource unifiedresources.Resource) string {
|
||
if resource.Proxmox == nil {
|
||
return ""
|
||
}
|
||
return strings.TrimSpace(resource.Proxmox.Instance)
|
||
}
|
||
|
||
func isDirectLoopbackRequest(req *http.Request) bool {
|
||
if req == nil {
|
||
return false
|
||
}
|
||
|
||
remote := extractRemoteIP(req.RemoteAddr)
|
||
ip := net.ParseIP(remote)
|
||
if ip == nil || !ip.IsLoopback() {
|
||
return false
|
||
}
|
||
|
||
if req.Header.Get("X-Forwarded-For") != "" ||
|
||
req.Header.Get("Forwarded") != "" ||
|
||
req.Header.Get("X-Real-IP") != "" {
|
||
return false
|
||
}
|
||
|
||
return true
|
||
}
|
||
|
||
// NewRouter creates a new router instance
|
||
func NewRouter(cfg *config.Config, monitor *monitoring.Monitor, mtMonitor *monitoring.MultiTenantMonitor, wsHub *websocket.Hub, reloadFunc func() error, serverVersion string, conversionStores ...*conversionStore) *Router {
|
||
var store *conversionStore
|
||
if len(conversionStores) > 0 {
|
||
store = conversionStores[0]
|
||
}
|
||
|
||
// Initialize persistent auth stores and capture the exact workers this router owns.
|
||
sessionStore := ensureSessionStore(cfg.DataPath)
|
||
csrfStore := ensureCSRFStore(cfg.DataPath)
|
||
|
||
updateHistory, err := updates.NewUpdateHistory(cfg.DataPath)
|
||
if err != nil {
|
||
log.Error().Err(err).Msg("Failed to initialize update history")
|
||
}
|
||
|
||
projectRoot, err := os.Getwd()
|
||
if err != nil {
|
||
projectRoot = "."
|
||
}
|
||
|
||
updateManager := updates.NewManager(cfg)
|
||
updateManager.SetHistory(updateHistory)
|
||
lifecycleCtx, lifecycleCancel := context.WithCancel(context.Background())
|
||
|
||
r := &Router{
|
||
mux: http.NewServeMux(),
|
||
config: cfg,
|
||
monitor: monitor,
|
||
mtMonitor: mtMonitor,
|
||
wsHub: wsHub,
|
||
reloadFunc: reloadFunc,
|
||
updateManager: updateManager,
|
||
updateHistory: updateHistory,
|
||
exportLimiter: NewRateLimiter(5, 1*time.Minute), // 5 attempts per minute
|
||
downloadLimiter: NewRateLimiter(60, 1*time.Minute), // downloads/installers per minute per IP
|
||
signupRateLimiter: NewRateLimiter(5, 1*time.Hour), // signup attempts per hour per IP
|
||
handoffExchangeRateLimiter: NewRateLimiter(20, 1*time.Minute), // cloud handoff token exchange per minute per IP
|
||
persistence: config.NewConfigPersistence(cfg.DataPath),
|
||
multiTenant: config.NewMultiTenantPersistence(cfg.DataPath),
|
||
sessionStore: sessionStore,
|
||
csrfStore: csrfStore,
|
||
authorizer: auth.GetAuthorizer(),
|
||
serverVersion: strings.TrimSpace(serverVersion),
|
||
projectRoot: projectRoot,
|
||
checksumCache: make(map[string]checksumCacheEntry),
|
||
lifecycleCtx: lifecycleCtx,
|
||
lifecycleCancel: lifecycleCancel,
|
||
hostedMode: os.Getenv("PULSE_HOSTED_MODE") == "true",
|
||
conversionStore: store,
|
||
monitorResourceAdapters: make(map[string]*unifiedresources.MonitorAdapter),
|
||
monitorSupplementalRecords: make(map[unifiedresources.DataSource]monitoring.MonitorSupplementalRecordsProvider),
|
||
startedPatrolOrgs: make(map[string]bool),
|
||
}
|
||
if r.wsHub != nil {
|
||
r.wsHub.SetTrustedProxyChecker(isTrustedProxyIP)
|
||
}
|
||
if r.hostedMode {
|
||
// Use defaults: 2000 req/min per org.
|
||
r.tenantRateLimiter = NewTenantRateLimiter(0, 0)
|
||
}
|
||
r.resourceRegistry = unifiedresources.NewRegistry(nil)
|
||
r.monitorResourceAdapter = unifiedresources.NewMonitorAdapter(r.resourceRegistry)
|
||
|
||
// Sync the configured admin user to the authorizer (if supported)
|
||
if cfg.AuthUser != "" {
|
||
auth.SetAdminUser(cfg.AuthUser)
|
||
}
|
||
|
||
// Initialize SSO service managers
|
||
r.oidcManager = NewOIDCServiceManager()
|
||
r.samlManager = NewSAMLServiceManager("")
|
||
|
||
r.initializeBootstrapToken()
|
||
|
||
r.setupRoutes()
|
||
log.Debug().Msg("Routes registered successfully")
|
||
|
||
// Start forwarding update progress to WebSocket
|
||
go r.forwardUpdateProgress()
|
||
|
||
// Start background update checker
|
||
go r.backgroundUpdateChecker(r.lifecycleCtx)
|
||
|
||
// Load system settings once at startup and cache them
|
||
r.reloadSystemSettings()
|
||
|
||
// Get cached values for middleware configuration
|
||
r.settingsMu.RLock()
|
||
allowEmbedding := r.cachedAllowEmbedding
|
||
allowedOrigins := r.cachedAllowedOrigins
|
||
r.settingsMu.RUnlock()
|
||
|
||
// Apply middleware chain:
|
||
// 1. Universal rate limiting (outermost to stop attacks early)
|
||
// 2. Auth context extraction (populates user/token in context)
|
||
// 3. Tenant selection and authorization (uses auth context)
|
||
// 4. Demo mode (read-only protection)
|
||
// 5. Error handling
|
||
// 6. Security headers with embedding configuration
|
||
// Note: TimeoutHandler breaks WebSocket upgrades
|
||
devMode := utils.GetenvTrim("FRONTEND_DEV_SERVER") != ""
|
||
handler := SecurityHeadersWithConfig(r, allowEmbedding, allowedOrigins, devMode)
|
||
handler = ErrorHandler(handler)
|
||
handler = DemoModeMiddleware(cfg, handler)
|
||
|
||
// Create tenant middleware with authorization checker.
|
||
// In hosted mode, tenant routing uses subscription lifecycle checks instead of FeatureMultiTenant.
|
||
var orgLoader OrganizationLoader
|
||
if r.multiTenant != nil {
|
||
orgLoader = NewMultiTenantOrganizationLoader(r.multiTenant)
|
||
}
|
||
authChecker := NewAuthorizationChecker(orgLoader)
|
||
tenantMiddleware := NewTenantMiddlewareWithConfig(TenantMiddlewareConfig{
|
||
Persistence: r.multiTenant,
|
||
AuthChecker: authChecker,
|
||
HostedMode: r.hostedMode,
|
||
})
|
||
|
||
// Per-tenant rate limiting (hosted mode only).
|
||
// This relies on org ID stored in context by TenantMiddleware; because the chain is built inside-out,
|
||
// it must be wrapped before TenantMiddleware so TenantMiddleware runs first.
|
||
if r.tenantRateLimiter != nil {
|
||
handler = TenantRateLimitMiddleware(r.tenantRateLimiter)(handler)
|
||
}
|
||
// Security: fail closed for non-default org requests when tenant monitor resolution fails.
|
||
// Wrapped before TenantMiddleware so TenantMiddleware executes first and sets org context.
|
||
handler = r.tenantMonitorGuardMiddleware(handler)
|
||
handler = tenantMiddleware.Middleware(handler)
|
||
|
||
// Auth context middleware extracts user/token info BEFORE tenant middleware
|
||
handler = AuthContextMiddleware(cfg, r.mtMonitor, handler)
|
||
|
||
handler = UniversalRateLimitMiddlewareWithConfig(newEndpointRateLimitConfig(), handler)
|
||
r.wrapped = handler
|
||
return r
|
||
}
|
||
|
||
func (r *Router) tenantMonitorGuardMiddleware(next http.Handler) http.Handler {
|
||
return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
|
||
orgID := strings.TrimSpace(GetOrgID(req.Context()))
|
||
if orgID == "" || orgID == "default" {
|
||
next.ServeHTTP(w, req)
|
||
return
|
||
}
|
||
|
||
if r.mtMonitor == nil {
|
||
writeErrorResponse(w, http.StatusServiceUnavailable, "tenant_unavailable", "Tenant monitor is not configured", nil)
|
||
return
|
||
}
|
||
monitor, err := r.mtMonitor.GetMonitor(orgID)
|
||
if err != nil || monitor == nil {
|
||
writeErrorResponse(w, http.StatusServiceUnavailable, "tenant_unavailable", "Tenant monitor is not available", nil)
|
||
return
|
||
}
|
||
|
||
next.ServeHTTP(w, req)
|
||
})
|
||
}
|
||
|
||
// setupRoutes configures all routes
|
||
func (r *Router) setupRoutes() {
|
||
// Create handlers
|
||
r.alertHandlers = NewAlertHandlers(r.mtMonitor, NewAlertMonitorWrapper(r.monitor), r.wsHub)
|
||
r.notificationHandlers = NewNotificationHandlers(r.mtMonitor, NewNotificationMonitorWrapper(r.monitor))
|
||
r.notificationHandlers.SetReadState(r.defaultReadState())
|
||
r.notificationQueueHandlers = NewNotificationQueueHandlers(r.monitor)
|
||
guestMetadataHandler := NewGuestMetadataHandler(r.multiTenant)
|
||
dockerMetadataHandler := NewDockerMetadataHandler(r.multiTenant)
|
||
hostMetadataHandler := NewHostMetadataHandler(r.multiTenant)
|
||
r.configHandlers = NewConfigHandlers(r.multiTenant, r.mtMonitor, r.reloadFunc, r.wsHub, guestMetadataHandler, r.reloadSystemSettings)
|
||
if r.monitor != nil {
|
||
r.configHandlers.SetMonitor(r.monitor)
|
||
}
|
||
r.configHandlers.SetMockModeChangeHook(r.syncPlatformSupplementalProviders)
|
||
r.trueNASHandlers = &TrueNASHandlers{
|
||
getPersistence: r.configHandlers.getPersistence,
|
||
getConfig: r.configHandlers.getConfig,
|
||
getMonitor: r.configHandlers.getMonitor,
|
||
getPoller: func(context.Context) *monitoring.TrueNASPoller { return r.trueNASPoller },
|
||
}
|
||
r.vmwareHandlers = &VMwareHandlers{
|
||
getPersistence: r.configHandlers.getPersistence,
|
||
getMonitor: r.configHandlers.getMonitor,
|
||
getPoller: func(context.Context) *monitoring.VMwarePoller { return r.vmwarePoller },
|
||
}
|
||
recoveryManager := recoverymanager.New(r.multiTenant)
|
||
r.recoveryHandlers = NewRecoveryHandlers(recoveryManager)
|
||
if r.mtMonitor != nil {
|
||
r.mtMonitor.SetRecoveryManager(recoveryManager)
|
||
}
|
||
if r.monitor != nil {
|
||
r.monitor.SetRecoveryManager(recoveryManager)
|
||
}
|
||
r.trueNASPoller = monitoring.NewTrueNASPoller(r.multiTenant, 0, recoveryManager)
|
||
r.trueNASPoller.Start(r.lifecycleCtx)
|
||
r.vmwarePoller = monitoring.NewVMwarePoller(r.multiTenant, 0)
|
||
r.vmwarePoller.Start(r.lifecycleCtx)
|
||
updateHandlers := NewUpdateHandlersWithContext(r.updateManager, r.updateHistory, r.lifecycleCtx)
|
||
r.dockerAgentHandlers = NewDockerAgentHandlers(r.mtMonitor, r.monitor, r.wsHub, r.config)
|
||
r.kubernetesAgentHandlers = NewKubernetesAgentHandlers(r.mtMonitor, r.monitor, r.wsHub)
|
||
r.unifiedAgentHandlers = NewUnifiedAgentHandlers(r.mtMonitor, r.monitor, r.wsHub)
|
||
r.kubernetesAgentHandlers.SetRecoveryIngestor(r.recoveryHandlers)
|
||
r.resourceHandlers = NewResourceHandlers(r.config)
|
||
if r.resourceHandlers != nil {
|
||
if store, err := r.resourceHandlers.getStore("default"); err == nil && store != nil {
|
||
r.monitorResourceAdapter = unifiedresources.NewMonitorAdapter(unifiedresources.NewRegistry(store))
|
||
}
|
||
}
|
||
r.syncPlatformSupplementalProviders(mock.IsMockEnabled())
|
||
if r.monitor != nil {
|
||
r.configureMonitorDependencies(r.monitor)
|
||
if r.resourceHandlers != nil {
|
||
r.resourceHandlers.SetStateProvider(r.monitor)
|
||
}
|
||
}
|
||
if r.mtMonitor != nil && r.resourceHandlers != nil {
|
||
r.resourceHandlers.SetTenantStateProvider(NewMultiTenantStateProvider(r.mtMonitor, r.monitor))
|
||
}
|
||
if r.mtMonitor != nil {
|
||
r.mtMonitor.SetMonitorInitializer(r.configureMonitorDependencies)
|
||
}
|
||
r.configProfileHandler = NewConfigProfileHandler(r.multiTenant)
|
||
r.licenseHandlers = NewLicenseHandlers(r.multiTenant, r.hostedMode, r.config)
|
||
r.licenseHandlers.SetMonitors(r.monitor, r.mtMonitor)
|
||
rbacProvider := NewTenantRBACProvider(r.config.DataPath)
|
||
r.rbacProvider = rbacProvider
|
||
orgHandlers := NewOrgHandlers(r.multiTenant, r.mtMonitor, rbacProvider)
|
||
orgHandlers.SetHostedMode(r.hostedMode)
|
||
orgHandlers.SetOnDelete(func(ctx context.Context, orgID string) error {
|
||
return r.CleanupTenant(ctx, orgID)
|
||
})
|
||
// Wire license service provider so middleware can access per-tenant license services
|
||
SetLicenseServiceProvider(r.licenseHandlers)
|
||
// Wire base data dir for overflow enforcement (reads OverflowGrantedAt from billing state).
|
||
SetOverflowBaseDataDir(r.config.DataPath)
|
||
r.reportingHandlers = NewReportingHandlers(r.mtMonitor, recoveryManager)
|
||
r.logHandlers = NewLogHandlers(r.config, r.persistence)
|
||
rbacHandlers := NewRBACHandlers(r.config, rbacProvider)
|
||
var magicLinkService *MagicLinkService
|
||
var magicLinkHandlers *MagicLinkHandlers
|
||
if r.hostedMode {
|
||
svc, err := NewMagicLinkServiceForDataPath(r.config.DataPath, nil)
|
||
if err != nil {
|
||
log.Error().Err(err).Msg("Failed to initialize magic link service")
|
||
} else {
|
||
magicLinkService = svc
|
||
}
|
||
magicLinkHandlers = NewMagicLinkHandlers(r.multiTenant, magicLinkService, r.hostedMode, r.resolvePublicURL)
|
||
}
|
||
|
||
hostedSignupHandlers := NewHostedSignupHandlers(r.multiTenant, rbacProvider, magicLinkService, r.resolvePublicURL, r.hostedMode)
|
||
r.stripeWebhookHandlers = NewStripeWebhookHandlers(
|
||
config.NewFileBillingStore(r.config.DataPath),
|
||
r.multiTenant,
|
||
rbacProvider,
|
||
magicLinkService,
|
||
r.resolvePublicURL,
|
||
r.hostedMode,
|
||
r.config.DataPath,
|
||
)
|
||
stripeWebhookHandlers := r.stripeWebhookHandlers
|
||
infraUpdateHandlers := NewUpdateDetectionHandlers(r.monitor, r.defaultReadState())
|
||
auditHandlers := NewAuditHandlers()
|
||
auditHandlers.SetResourceStoreProvider(r.resourceHandlers.getStore)
|
||
|
||
// System settings and API token management
|
||
r.systemSettingsHandler = NewSystemSettingsHandler(r.config, r.persistence, r.wsHub, r.mtMonitor, r.monitor, r.reloadSystemSettings, r.reloadFunc)
|
||
|
||
// Agent execution server for AI tool use
|
||
r.agentExecServer = agentexec.NewServer(func(token string, agentID string) bool {
|
||
// Validate agent tokens using the API tokens system with scope check
|
||
if r.config == nil {
|
||
return false
|
||
}
|
||
// Check the new API tokens system with scope validation
|
||
if record, ok := r.config.ValidateAPIToken(token); ok {
|
||
// SECURITY: Require agent:exec scope for WebSocket connections
|
||
if !record.HasScope(config.ScopeAgentExec) {
|
||
log.Warn().
|
||
Str("token_id", record.ID).
|
||
Msg("Agent exec token missing required scope: agent:exec")
|
||
return false
|
||
}
|
||
|
||
// SECURITY: Check if token is bound to a specific agent
|
||
if boundID, ok := record.Metadata["bound_agent_id"]; ok && boundID != "" {
|
||
if boundID != agentID {
|
||
log.Warn().
|
||
Str("token_id", record.ID).
|
||
Str("bound_id", boundID).
|
||
Str("requested_id", agentID).
|
||
Msg("Agent token mismatch: token is bound to a different agent ID")
|
||
return false
|
||
}
|
||
}
|
||
|
||
return true
|
||
}
|
||
return false
|
||
})
|
||
|
||
// Deploy store and handlers for cluster agent deployment
|
||
deployStore, deployErr := deploy.Open(filepath.Join(r.config.DataPath, "deploy.db"))
|
||
if deployErr != nil {
|
||
log.Error().Err(deployErr).Msg("Failed to open deploy store (cluster deployment disabled)")
|
||
}
|
||
if deployStore != nil {
|
||
r.deployStore = deployStore
|
||
if r.monitor != nil {
|
||
reservation := deploy.NewReservationManager()
|
||
r.deployHandlers = NewDeployHandlers(deployStore, r.monitor, r.agentExecServer, reservation, r.resolvePublicURL, r.config, r.persistence)
|
||
|
||
SetDeployReservationCounter(func(ctx context.Context) int {
|
||
orgID := GetOrgID(ctx)
|
||
if orgID == "" {
|
||
orgID = "default"
|
||
}
|
||
return reservation.ReservedForOrg(orgID)
|
||
})
|
||
|
||
// Periodic cleanup of expired reservations.
|
||
go func() {
|
||
ticker := time.NewTicker(5 * time.Minute)
|
||
defer ticker.Stop()
|
||
for {
|
||
select {
|
||
case <-r.lifecycleCtx.Done():
|
||
return
|
||
case <-ticker.C:
|
||
reservation.CleanExpired()
|
||
}
|
||
}
|
||
}()
|
||
}
|
||
}
|
||
|
||
// AI settings endpoints
|
||
r.aiSettingsHandler = NewAISettingsHandler(r.multiTenant, r.mtMonitor, r.agentExecServer)
|
||
r.aiSettingsHandler.SetConfig(r.config)
|
||
// Inject state provider so AI has access to full infrastructure context (VMs, containers, IPs)
|
||
if r.monitor != nil {
|
||
r.aiSettingsHandler.SetStateProvider(r.monitor)
|
||
r.aiSettingsHandler.SetReadState(r.defaultReadState())
|
||
// Inject alert provider so AI has awareness of current alerts
|
||
// Also inject alert resolver so AI Patrol can autonomously resolve alerts when issues are fixed
|
||
if alertManager := r.monitor.GetAlertManager(); alertManager != nil {
|
||
alertAdapter := ai.NewAlertManagerAdapter(alertManager)
|
||
r.aiSettingsHandler.SetAlertProvider(alertAdapter)
|
||
r.aiSettingsHandler.SetAlertResolver(alertAdapter)
|
||
}
|
||
if incidentStore := r.monitor.GetIncidentStore(); incidentStore != nil {
|
||
r.aiSettingsHandler.SetIncidentStore(incidentStore)
|
||
}
|
||
}
|
||
// Inject unified resource provider for AI context and routing.
|
||
if provider := r.defaultUnifiedResourceProvider(); provider != nil {
|
||
r.aiSettingsHandler.SetUnifiedResourceProvider(provider)
|
||
} else {
|
||
log.Warn().Msg("[Router] unified resource provider is nil, cannot inject unified resource provider")
|
||
}
|
||
// Inject metadata provider for AI URL discovery feature
|
||
// This allows AI to set resource URLs when it discovers web services
|
||
metadataProvider := NewMetadataProvider(
|
||
guestMetadataHandler.Store(),
|
||
dockerMetadataHandler.Store(),
|
||
hostMetadataHandler.Store(),
|
||
)
|
||
r.aiSettingsHandler.SetMetadataProvider(metadataProvider)
|
||
|
||
// AI chat handler
|
||
r.aiHandler = NewAIHandler(r.multiTenant, r.mtMonitor, r.agentExecServer)
|
||
r.aiHandler.SetReadState(r.defaultReadState())
|
||
r.aiHandler.SetRecoveryManager(recoveryManager)
|
||
r.aiHandler.SetServiceInitializer(func(ctx context.Context, service AIService) {
|
||
r.wireAIChatDependenciesForService(ctx, service)
|
||
})
|
||
|
||
// AI-powered infrastructure discovery handlers
|
||
// Note: The actual service is wired up later via SetDiscoveryService
|
||
r.discoveryHandlers = NewDiscoveryHandlers(nil, r.config)
|
||
|
||
// Wire license checker for Pro feature gating (AI Patrol, Alert Analysis, Auto-Fix)
|
||
r.aiSettingsHandler.SetLicenseHandlers(r.licenseHandlers)
|
||
// Wire model change callback to restart AI chat service when model is changed
|
||
r.aiSettingsHandler.SetOnModelChange(func() {
|
||
r.RestartAIChat(context.Background())
|
||
})
|
||
// Wire control settings change callback to update MCP tool visibility
|
||
r.aiSettingsHandler.SetOnControlSettingsChange(func() {
|
||
if r.aiHandler != nil {
|
||
ctx := context.Background()
|
||
if svc := r.aiHandler.GetService(ctx); svc != nil {
|
||
cfg := r.aiHandler.GetAIConfig(ctx)
|
||
if cfg != nil {
|
||
svc.UpdateControlSettings(cfg)
|
||
log.Info().Str("control_level", cfg.GetControlLevel()).Msg("Updated AI control settings")
|
||
}
|
||
}
|
||
}
|
||
})
|
||
// Wire AI handler to profile handler for AI-assisted suggestions
|
||
r.configProfileHandler.SetAIHandler(r.aiHandler)
|
||
// Wire chat handler to AI settings handler for investigation orchestration
|
||
r.aiSettingsHandler.SetChatHandler(r.aiHandler)
|
||
// Wire license checker for alert manager Pro features (Update Alerts)
|
||
if r.monitor != nil {
|
||
alertMgr := r.monitor.GetAlertManager()
|
||
if alertMgr != nil {
|
||
licSvc := r.licenseHandlers.Service(context.Background())
|
||
alertMgr.SetLicenseChecker(func(feature string) bool {
|
||
return licSvc.HasFeature(feature)
|
||
})
|
||
}
|
||
}
|
||
|
||
// Initialize recovery token store and capture the exact worker this router owns.
|
||
r.recoveryTokenStore = ensureRecoveryTokenStore(r.config.DataPath)
|
||
|
||
r.registerPublicAndAuthRoutes()
|
||
r.registerMonitoringRoutes(guestMetadataHandler, dockerMetadataHandler, hostMetadataHandler, infraUpdateHandlers)
|
||
r.registerConfigSystemRoutes(updateHandlers)
|
||
r.registerAIRelayRoutes()
|
||
r.registerOrgLicenseRoutes(orgHandlers, rbacHandlers, auditHandlers)
|
||
r.registerHostedRoutes(hostedSignupHandlers, magicLinkHandlers, stripeWebhookHandlers)
|
||
|
||
// Debug profiling endpoints (admin-only, can be disabled via PULSE_PPROF_DISABLED=true)
|
||
if pprofEnabled() {
|
||
r.registerDebugRoutes()
|
||
}
|
||
|
||
// Note: Frontend handler is handled manually in ServeHTTP to prevent redirect issues
|
||
// See issue #334 - ServeMux redirects empty path to "./" which breaks reverse proxies
|
||
}
|
||
|
||
// CleanupTenant removes all per-tenant resources (RBAC, AI, License) for a deleted org.
|
||
func (r *Router) CleanupTenant(ctx context.Context, orgID string) error {
|
||
orgID = strings.TrimSpace(orgID)
|
||
if orgID == "" || orgID == "default" {
|
||
return nil
|
||
}
|
||
|
||
var errs []error
|
||
|
||
r.StopPatrolForOrg(orgID)
|
||
|
||
if r.aiSettingsHandler != nil {
|
||
r.aiSettingsHandler.RemoveTenantService(orgID)
|
||
}
|
||
|
||
if r.rbacProvider != nil {
|
||
if err := r.rbacProvider.RemoveTenant(orgID); err != nil {
|
||
errs = append(errs, fmt.Errorf("rbac cleanup: %w", err))
|
||
}
|
||
}
|
||
|
||
if r.aiHandler != nil {
|
||
if err := r.aiHandler.RemoveTenantService(ctx, orgID); err != nil {
|
||
errs = append(errs, fmt.Errorf("ai cleanup: %w", err))
|
||
}
|
||
}
|
||
|
||
if r.licenseHandlers != nil {
|
||
r.licenseHandlers.RemoveTenantService(orgID)
|
||
}
|
||
r.monitorAdapterMu.Lock()
|
||
delete(r.monitorResourceAdapters, orgID)
|
||
r.monitorAdapterMu.Unlock()
|
||
|
||
if len(errs) > 0 {
|
||
return errors.Join(errs...)
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// RemoveTenantService removes the cached license service for a deleted org.
|
||
func (h *LicenseHandlers) RemoveTenantService(orgID string) {
|
||
h.services.Delete(orgID)
|
||
}
|
||
|
||
// routeAISessions routes session-specific AI chat requests
|
||
func (r *Router) routeAISessions(w http.ResponseWriter, req *http.Request) {
|
||
// Extract session ID from path: /api/ai/sessions/{id}[/messages|/abort|/summarize|/diff|/fork|/revert|/unrevert]
|
||
path := strings.TrimPrefix(req.URL.Path, "/api/ai/sessions/")
|
||
parts := strings.SplitN(path, "/", 2)
|
||
sessionID := parts[0]
|
||
|
||
if sessionID == "" {
|
||
http.Error(w, "Session ID required", http.StatusBadRequest)
|
||
return
|
||
}
|
||
|
||
// Check if there's a sub-resource
|
||
if len(parts) > 1 {
|
||
switch parts[1] {
|
||
case "messages":
|
||
if !ensureRelayMobileRuntimeRoute(w, req, relayMobileRouteSessionMessages) {
|
||
return
|
||
}
|
||
r.aiHandler.HandleMessages(w, req, sessionID)
|
||
case "abort":
|
||
if !ensureRelayMobileRuntimeRoute(w, req, relayMobileRouteSessionAbort) {
|
||
return
|
||
}
|
||
r.aiHandler.HandleAbort(w, req, sessionID)
|
||
case "summarize":
|
||
if !ensureScope(w, req, config.ScopeAIChat) {
|
||
return
|
||
}
|
||
r.aiHandler.HandleSummarize(w, req, sessionID)
|
||
case "diff":
|
||
if !ensureScope(w, req, config.ScopeAIChat) {
|
||
return
|
||
}
|
||
r.aiHandler.HandleDiff(w, req, sessionID)
|
||
case "fork":
|
||
if !ensureScope(w, req, config.ScopeAIChat) {
|
||
return
|
||
}
|
||
r.aiHandler.HandleFork(w, req, sessionID)
|
||
case "revert":
|
||
if !ensureScope(w, req, config.ScopeAIChat) {
|
||
return
|
||
}
|
||
r.aiHandler.HandleRevert(w, req, sessionID)
|
||
case "unrevert":
|
||
if !ensureScope(w, req, config.ScopeAIChat) {
|
||
return
|
||
}
|
||
r.aiHandler.HandleUnrevert(w, req, sessionID)
|
||
default:
|
||
if !ensureScope(w, req, config.ScopeAIChat) {
|
||
return
|
||
}
|
||
http.Error(w, "Not found", http.StatusNotFound)
|
||
}
|
||
return
|
||
}
|
||
|
||
// Handle session-level operations
|
||
switch req.Method {
|
||
case http.MethodDelete:
|
||
if !ensureRelayMobileRuntimeRoute(w, req, relayMobileRouteSessionDelete) {
|
||
return
|
||
}
|
||
r.aiHandler.HandleDeleteSession(w, req, sessionID)
|
||
default:
|
||
if !ensureScope(w, req, config.ScopeAIChat) {
|
||
return
|
||
}
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
}
|
||
}
|
||
|
||
func (r *Router) routeAISessionsCollection(w http.ResponseWriter, req *http.Request) {
|
||
switch req.Method {
|
||
case http.MethodGet:
|
||
if !ensureRelayMobileRuntimeRoute(w, req, relayMobileRouteSessionsList) {
|
||
return
|
||
}
|
||
r.aiHandler.HandleSessions(w, req)
|
||
case http.MethodPost:
|
||
if !ensureRelayMobileRuntimeRoute(w, req, relayMobileRouteSessionCreate) {
|
||
return
|
||
}
|
||
r.aiHandler.HandleCreateSession(w, req)
|
||
default:
|
||
if !ensureScope(w, req, config.ScopeAIChat) {
|
||
return
|
||
}
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
}
|
||
}
|
||
|
||
func (r *Router) routeAIPatrolFindings(w http.ResponseWriter, req *http.Request) {
|
||
switch req.Method {
|
||
case http.MethodGet:
|
||
if !ensureRelayMobileRuntimeRoute(w, req, relayMobileRoutePatrolFindingsList) {
|
||
return
|
||
}
|
||
r.aiSettingsHandler.HandleGetPatrolFindings(w, req)
|
||
case http.MethodDelete:
|
||
if !ensureScope(w, req, config.ScopeAIExecute) {
|
||
return
|
||
}
|
||
// Clear all findings - doesn't require Pro license so users can clean up accumulated findings
|
||
r.aiSettingsHandler.HandleClearAllFindings(w, req)
|
||
default:
|
||
if !ensureScope(w, req, config.ScopeAIExecute) {
|
||
return
|
||
}
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
}
|
||
}
|
||
|
||
func (r *Router) routeAIFindings(w http.ResponseWriter, req *http.Request) {
|
||
path := req.URL.Path
|
||
switch {
|
||
case strings.HasSuffix(path, "/investigation/messages"):
|
||
if !ensureRelayMobileRuntimeRoute(w, req, relayMobileRouteFindingInvestigationMessages) {
|
||
return
|
||
}
|
||
r.aiSettingsHandler.HandleGetInvestigationMessages(w, req)
|
||
case strings.HasSuffix(path, "/investigation"):
|
||
if !ensureRelayMobileRuntimeRoute(w, req, relayMobileRouteFindingInvestigation) {
|
||
return
|
||
}
|
||
r.aiSettingsHandler.HandleGetInvestigation(w, req)
|
||
case strings.HasSuffix(path, "/reinvestigate"):
|
||
if !ensureScope(w, req, config.ScopeAIExecute) {
|
||
return
|
||
}
|
||
r.aiAutoFixEndpoints.HandleReinvestigateFinding(w, req)
|
||
case strings.HasSuffix(path, "/reapprove"):
|
||
if !ensureScope(w, req, config.ScopeAIExecute) {
|
||
return
|
||
}
|
||
r.aiAutoFixEndpoints.HandleReapproveInvestigationFix(w, req)
|
||
default:
|
||
if !ensureScope(w, req, config.ScopeAIExecute) {
|
||
return
|
||
}
|
||
http.Error(w, "Not found", http.StatusNotFound)
|
||
}
|
||
}
|
||
|
||
// routeApprovals routes approval-specific requests
|
||
func (r *Router) routeApprovals(w http.ResponseWriter, req *http.Request) {
|
||
// Extract approval ID and action from path: /api/ai/approvals/{id}[/approve|/deny]
|
||
path := strings.TrimPrefix(req.URL.Path, "/api/ai/approvals/")
|
||
parts := strings.SplitN(path, "/", 2)
|
||
|
||
if parts[0] == "" {
|
||
http.Error(w, "Approval ID required", http.StatusBadRequest)
|
||
return
|
||
}
|
||
|
||
// Check if there's an action
|
||
if len(parts) > 1 {
|
||
switch parts[1] {
|
||
case "approve":
|
||
if !ensureRelayMobileRuntimeRoute(w, req, relayMobileRouteApprovalApprove) {
|
||
return
|
||
}
|
||
r.aiSettingsHandler.HandleApproveCommand(w, req)
|
||
case "deny":
|
||
if !ensureRelayMobileRuntimeRoute(w, req, relayMobileRouteApprovalDeny) {
|
||
return
|
||
}
|
||
r.aiSettingsHandler.HandleDenyCommand(w, req)
|
||
default:
|
||
if !ensureScope(w, req, config.ScopeAIExecute) {
|
||
return
|
||
}
|
||
http.Error(w, "Not found", http.StatusNotFound)
|
||
}
|
||
return
|
||
}
|
||
|
||
// Handle approval-level operations (GET specific approval)
|
||
switch req.Method {
|
||
case http.MethodGet:
|
||
if !ensureScope(w, req, config.ScopeAIExecute) {
|
||
return
|
||
}
|
||
r.aiSettingsHandler.HandleGetApproval(w, req)
|
||
default:
|
||
if !ensureScope(w, req, config.ScopeAIExecute) {
|
||
return
|
||
}
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
}
|
||
}
|
||
|
||
// routeQuestions routes question-specific requests
|
||
func (r *Router) routeQuestions(w http.ResponseWriter, req *http.Request) {
|
||
// Extract question ID and action from path: /api/ai/question/{id}/answer
|
||
path := strings.TrimPrefix(req.URL.Path, "/api/ai/question/")
|
||
parts := strings.SplitN(path, "/", 2)
|
||
|
||
if parts[0] == "" {
|
||
http.Error(w, "Question ID required", http.StatusBadRequest)
|
||
return
|
||
}
|
||
|
||
questionID := parts[0]
|
||
|
||
// Check if there's an action
|
||
if len(parts) > 1 && parts[1] == "answer" {
|
||
if req.Method == http.MethodPost {
|
||
r.aiHandler.HandleAnswerQuestion(w, req, questionID)
|
||
} else {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
}
|
||
return
|
||
}
|
||
|
||
http.Error(w, "Not found", http.StatusNotFound)
|
||
}
|
||
|
||
// handleAgentWebSocket handles WebSocket connections from agents for AI command execution
|
||
func (r *Router) handleAgentWebSocket(w http.ResponseWriter, req *http.Request) {
|
||
if r.agentExecServer == nil {
|
||
http.Error(w, "Agent execution not available", http.StatusServiceUnavailable)
|
||
return
|
||
}
|
||
r.agentExecServer.HandleWebSocket(w, req)
|
||
}
|
||
|
||
func (r *Router) handleVerifyTemperatureSSH(w http.ResponseWriter, req *http.Request) {
|
||
if r.configHandlers == nil {
|
||
http.Error(w, "Service unavailable", http.StatusServiceUnavailable)
|
||
return
|
||
}
|
||
|
||
// Check setup token first (for setup scripts)
|
||
if r.isValidSetupTokenForRequest(req) {
|
||
r.configHandlers.HandleVerifyTemperatureSSH(w, req)
|
||
return
|
||
}
|
||
|
||
// Require authentication
|
||
if !CheckAuth(r.config, w, req) {
|
||
log.Warn().
|
||
Str("ip", req.RemoteAddr).
|
||
Str("path", req.URL.Path).
|
||
Str("method", req.Method).
|
||
Msg("Unauthorized access attempt (verify-temperature-ssh)")
|
||
|
||
if strings.HasPrefix(req.URL.Path, "/api/") || strings.Contains(req.Header.Get("Accept"), "application/json") {
|
||
w.Header().Set("Content-Type", "application/json")
|
||
w.WriteHeader(http.StatusUnauthorized)
|
||
w.Write([]byte(`{"error":"Authentication required"}`))
|
||
} else {
|
||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||
}
|
||
return
|
||
}
|
||
|
||
// Check admin privileges for proxy auth users
|
||
if r.config.ProxyAuthSecret != "" {
|
||
if valid, username, isAdmin := CheckProxyAuth(r.config, req); valid && !isAdmin {
|
||
log.Warn().
|
||
Str("ip", GetClientIP(req)).
|
||
Str("username", username).
|
||
Msg("Non-admin user attempted verify-temperature-ssh")
|
||
http.Error(w, "Admin privileges required", http.StatusForbidden)
|
||
return
|
||
}
|
||
}
|
||
|
||
// Require admin session identity or settings:write token for privileged SSH probes.
|
||
if !ensureSettingsWriteScope(r.config, w, req) {
|
||
return
|
||
}
|
||
|
||
r.configHandlers.HandleVerifyTemperatureSSH(w, req)
|
||
}
|
||
|
||
// handleSSHConfig handles SSH config writes with setup token or API auth
|
||
func (r *Router) handleSSHConfig(w http.ResponseWriter, req *http.Request) {
|
||
if r.systemSettingsHandler == nil {
|
||
http.Error(w, "Service unavailable", http.StatusServiceUnavailable)
|
||
return
|
||
}
|
||
|
||
// Check setup token first (for setup scripts)
|
||
if r.isValidSetupTokenForRequest(req) {
|
||
r.systemSettingsHandler.HandleSSHConfig(w, req)
|
||
return
|
||
}
|
||
|
||
// Require authentication
|
||
if !CheckAuth(r.config, w, req) {
|
||
log.Warn().
|
||
Str("ip", req.RemoteAddr).
|
||
Str("path", req.URL.Path).
|
||
Str("method", req.Method).
|
||
Msg("Unauthorized access attempt (ssh-config)")
|
||
|
||
if strings.HasPrefix(req.URL.Path, "/api/") || strings.Contains(req.Header.Get("Accept"), "application/json") {
|
||
w.Header().Set("Content-Type", "application/json")
|
||
w.WriteHeader(http.StatusUnauthorized)
|
||
w.Write([]byte(`{"error":"Authentication required"}`))
|
||
} else {
|
||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||
}
|
||
return
|
||
}
|
||
|
||
// Check admin privileges for proxy auth users
|
||
if r.config.ProxyAuthSecret != "" {
|
||
if valid, username, isAdmin := CheckProxyAuth(r.config, req); valid && !isAdmin {
|
||
log.Warn().
|
||
Str("ip", GetClientIP(req)).
|
||
Str("username", username).
|
||
Msg("Non-admin user attempted ssh-config update")
|
||
http.Error(w, "Admin privileges required", http.StatusForbidden)
|
||
return
|
||
}
|
||
}
|
||
|
||
// Require admin session identity or settings:write token for privileged SSH config writes.
|
||
if !ensureSettingsWriteScope(r.config, w, req) {
|
||
return
|
||
}
|
||
|
||
r.systemSettingsHandler.HandleSSHConfig(w, req)
|
||
}
|
||
|
||
func extractSetupToken(req *http.Request) string {
|
||
if token := strings.TrimSpace(req.Header.Get("X-Setup-Token")); token != "" {
|
||
return token
|
||
}
|
||
if token := extractBearerToken(req.Header.Get("Authorization")); token != "" {
|
||
return token
|
||
}
|
||
if token := strings.TrimSpace(req.URL.Query().Get("auth_token")); token != "" {
|
||
return token
|
||
}
|
||
return ""
|
||
}
|
||
|
||
func (r *Router) isValidSetupTokenForRequest(req *http.Request) bool {
|
||
if r == nil || r.configHandlers == nil || req == nil {
|
||
return false
|
||
}
|
||
|
||
token := extractSetupToken(req)
|
||
if token == "" {
|
||
return false
|
||
}
|
||
|
||
requestOrgID := resolveTenantOrgID(req)
|
||
if !isValidOrganizationID(requestOrgID) {
|
||
return false
|
||
}
|
||
|
||
return r.configHandlers.ValidateSetupTokenForOrg(token, requestOrgID)
|
||
}
|
||
|
||
func extractBearerToken(header string) string {
|
||
if header == "" {
|
||
return ""
|
||
}
|
||
|
||
trimmed := strings.TrimSpace(header)
|
||
if len(trimmed) < 7 {
|
||
return ""
|
||
}
|
||
|
||
if strings.HasPrefix(strings.ToLower(trimmed), "bearer ") {
|
||
return strings.TrimSpace(trimmed[7:])
|
||
}
|
||
|
||
return ""
|
||
}
|
||
|
||
// Handler returns the router wrapped with middleware.
|
||
func (r *Router) Handler() http.Handler {
|
||
if r.wrapped != nil {
|
||
return r.wrapped
|
||
}
|
||
return r
|
||
}
|
||
|
||
// SetMonitor updates the router and associated handlers with a new monitor instance.
|
||
func (r *Router) SetMonitor(m *monitoring.Monitor) {
|
||
r.monitor = m
|
||
if r.alertHandlers != nil {
|
||
r.alertHandlers.SetMonitor(NewAlertMonitorWrapper(m))
|
||
}
|
||
if r.configHandlers != nil {
|
||
r.configHandlers.SetMonitor(m)
|
||
}
|
||
if r.notificationHandlers != nil {
|
||
r.notificationHandlers.SetMonitor(NewNotificationMonitorWrapper(m))
|
||
r.notificationHandlers.SetReadState(r.defaultReadState())
|
||
}
|
||
if r.dockerAgentHandlers != nil {
|
||
r.dockerAgentHandlers.SetMonitor(m)
|
||
}
|
||
if r.unifiedAgentHandlers != nil {
|
||
r.unifiedAgentHandlers.SetMonitor(m)
|
||
}
|
||
if r.systemSettingsHandler != nil {
|
||
r.systemSettingsHandler.SetMonitor(m)
|
||
}
|
||
if m != nil {
|
||
if url := strings.TrimSpace(r.config.PublicURL); url != "" {
|
||
if mgr := m.GetNotificationManager(); mgr != nil {
|
||
mgr.SetPublicURL(url)
|
||
}
|
||
}
|
||
r.configureMonitorDependencies(m)
|
||
if r.resourceHandlers != nil {
|
||
r.resourceHandlers.SetStateProvider(m)
|
||
}
|
||
|
||
// Set state provider on AI handler so patrol service gets created
|
||
// (Critical: patrol service is created lazily in SetStateProvider)
|
||
if r.aiSettingsHandler != nil {
|
||
r.aiSettingsHandler.SetStateProvider(m)
|
||
r.aiSettingsHandler.SetReadState(r.defaultReadState())
|
||
r.aiSettingsHandler.SetUnifiedResourceProvider(r.defaultUnifiedResourceProvider())
|
||
// Also inject alert provider and resolver now that monitor is available
|
||
if alertManager := m.GetAlertManager(); alertManager != nil {
|
||
alertAdapter := ai.NewAlertManagerAdapter(alertManager)
|
||
r.aiSettingsHandler.SetAlertProvider(alertAdapter)
|
||
r.aiSettingsHandler.SetAlertResolver(alertAdapter)
|
||
}
|
||
if incidentStore := m.GetIncidentStore(); incidentStore != nil {
|
||
r.aiSettingsHandler.SetIncidentStore(incidentStore)
|
||
}
|
||
}
|
||
if r.aiHandler != nil {
|
||
r.aiHandler.SetReadState(r.defaultReadState())
|
||
}
|
||
|
||
// Set up Docker detector for automatic Docker detection in LXC containers
|
||
if r.agentExecServer != nil {
|
||
// Create a command executor function that wraps the agent exec server
|
||
execFunc := func(ctx context.Context, hostname string, command string, timeout int) (string, int, error) {
|
||
agentID, found := r.agentExecServer.GetAgentForHost(hostname)
|
||
if !found {
|
||
return "", -1, fmt.Errorf("no agent connected for host %s", hostname)
|
||
}
|
||
result, err := r.agentExecServer.ExecuteCommand(ctx, agentID, agentexec.ExecuteCommandPayload{
|
||
RequestID: fmt.Sprintf("docker-check-%d", time.Now().UnixNano()),
|
||
Command: command,
|
||
Timeout: timeout,
|
||
})
|
||
if err != nil {
|
||
return "", -1, err
|
||
}
|
||
return result.Stdout + result.Stderr, result.ExitCode, nil
|
||
}
|
||
|
||
checker := monitoring.NewAgentDockerChecker(execFunc)
|
||
m.SetDockerChecker(checker)
|
||
log.Info().Msg("[Router] Docker detector configured for automatic LXC Docker detection")
|
||
}
|
||
}
|
||
}
|
||
|
||
func (r *Router) defaultReadState() unifiedresources.ReadState {
|
||
if r == nil {
|
||
return nil
|
||
}
|
||
|
||
if r.monitor != nil {
|
||
if readState := r.monitor.GetUnifiedReadState(); readState != nil {
|
||
return readState
|
||
}
|
||
}
|
||
if r.monitorResourceAdapter != nil {
|
||
return r.monitorResourceAdapter
|
||
}
|
||
return r.resourceRegistry
|
||
}
|
||
|
||
func (r *Router) unifiedResourceProviderForMonitor(m *monitoring.Monitor) ai.UnifiedResourceProvider {
|
||
if r == nil {
|
||
return nil
|
||
}
|
||
|
||
if m != nil {
|
||
if readState := m.GetUnifiedReadState(); readState != nil {
|
||
if provider, ok := readState.(ai.UnifiedResourceProvider); ok && provider != nil {
|
||
return provider
|
||
}
|
||
}
|
||
}
|
||
|
||
if adapter := r.monitorAdapterForMonitor(m); adapter != nil {
|
||
return adapter
|
||
}
|
||
if r.monitorResourceAdapter != nil {
|
||
return r.monitorResourceAdapter
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func (r *Router) defaultUnifiedResourceProvider() ai.UnifiedResourceProvider {
|
||
if r == nil {
|
||
return nil
|
||
}
|
||
|
||
if r.monitor != nil {
|
||
if provider := r.unifiedResourceProviderForMonitor(r.monitor); provider != nil {
|
||
return provider
|
||
}
|
||
}
|
||
return r.unifiedResourceProviderForMonitor(nil)
|
||
}
|
||
|
||
func (r *Router) monitorAdapterForMonitor(m *monitoring.Monitor) *unifiedresources.MonitorAdapter {
|
||
if r == nil || m == nil {
|
||
return nil
|
||
}
|
||
|
||
orgID := strings.TrimSpace(m.GetOrgID())
|
||
if orgID == "" || orgID == "default" {
|
||
return r.monitorResourceAdapter
|
||
}
|
||
|
||
r.monitorAdapterMu.Lock()
|
||
defer r.monitorAdapterMu.Unlock()
|
||
|
||
if r.monitorResourceAdapters == nil {
|
||
r.monitorResourceAdapters = make(map[string]*unifiedresources.MonitorAdapter)
|
||
}
|
||
if existing := r.monitorResourceAdapters[orgID]; existing != nil {
|
||
return existing
|
||
}
|
||
|
||
store := unifiedresources.ResourceStore(nil)
|
||
if r.resourceHandlers != nil {
|
||
if resolved, err := r.resourceHandlers.getStore(orgID); err == nil {
|
||
store = resolved
|
||
}
|
||
}
|
||
adapter := unifiedresources.NewMonitorAdapter(unifiedresources.NewRegistry(store))
|
||
r.monitorResourceAdapters[orgID] = adapter
|
||
return adapter
|
||
}
|
||
|
||
func (r *Router) configureMonitorDependencies(m *monitoring.Monitor) {
|
||
if r == nil || m == nil {
|
||
return
|
||
}
|
||
|
||
if adapter := r.monitorAdapterForMonitor(m); adapter != nil {
|
||
log.Debug().Msg("[Router] Injecting unified resource adapter into monitor")
|
||
m.SetResourceStore(adapter)
|
||
}
|
||
|
||
if len(r.monitorSupplementalRecords) == 0 {
|
||
return
|
||
}
|
||
|
||
keys := make([]string, 0, len(r.monitorSupplementalRecords))
|
||
for source := range r.monitorSupplementalRecords {
|
||
keys = append(keys, string(source))
|
||
}
|
||
sort.Strings(keys)
|
||
|
||
for _, key := range keys {
|
||
source := unifiedresources.DataSource(key)
|
||
provider := r.monitorSupplementalRecords[source]
|
||
m.SetSupplementalRecordsProvider(source, provider)
|
||
}
|
||
}
|
||
|
||
func (r *Router) setMonitorSupplementalRecordsProvider(source unifiedresources.DataSource, provider monitoring.MonitorSupplementalRecordsProvider) {
|
||
if r == nil {
|
||
return
|
||
}
|
||
|
||
normalized := unifiedresources.DataSource(strings.ToLower(strings.TrimSpace(string(source))))
|
||
if normalized == "" {
|
||
return
|
||
}
|
||
|
||
if r.monitorSupplementalRecords == nil {
|
||
r.monitorSupplementalRecords = make(map[unifiedresources.DataSource]monitoring.MonitorSupplementalRecordsProvider)
|
||
}
|
||
if provider == nil {
|
||
delete(r.monitorSupplementalRecords, normalized)
|
||
} else {
|
||
r.monitorSupplementalRecords[normalized] = provider
|
||
}
|
||
|
||
if r.monitor != nil {
|
||
r.monitor.SetSupplementalRecordsProvider(normalized, provider)
|
||
}
|
||
if r.mtMonitor != nil {
|
||
r.mtMonitor.SetMonitorInitializer(r.configureMonitorDependencies)
|
||
}
|
||
}
|
||
|
||
func (r *Router) syncPlatformSupplementalProviders(mockEnabled bool) {
|
||
if r == nil {
|
||
return
|
||
}
|
||
|
||
if mockEnabled {
|
||
truenas.SetFeatureEnabled(true)
|
||
vmware.SetFeatureEnabled(true)
|
||
|
||
trueNASAdapter := mockSupplementalRecordsAdapter{source: unifiedresources.SourceTrueNAS}
|
||
vmwareAdapter := mockSupplementalRecordsAdapter{source: unifiedresources.SourceVMware}
|
||
|
||
if r.resourceHandlers != nil {
|
||
r.resourceHandlers.SetSupplementalRecordsProvider(unifiedresources.SourceTrueNAS, trueNASAdapter)
|
||
r.resourceHandlers.SetSupplementalRecordsProvider(unifiedresources.SourceVMware, vmwareAdapter)
|
||
}
|
||
r.setMonitorSupplementalRecordsProvider(unifiedresources.SourceTrueNAS, trueNASAdapter)
|
||
r.setMonitorSupplementalRecordsProvider(unifiedresources.SourceVMware, vmwareAdapter)
|
||
return
|
||
}
|
||
|
||
truenas.ResetFeatureEnabledFromEnv()
|
||
vmware.ResetFeatureEnabledFromEnv()
|
||
|
||
if r.resourceHandlers != nil {
|
||
r.resourceHandlers.SetSupplementalRecordsProvider(unifiedresources.SourceTrueNAS, r.trueNASPoller)
|
||
r.resourceHandlers.SetSupplementalRecordsProvider(unifiedresources.SourceVMware, r.vmwarePoller)
|
||
}
|
||
r.setMonitorSupplementalRecordsProvider(unifiedresources.SourceTrueNAS, r.trueNASPoller)
|
||
r.setMonitorSupplementalRecordsProvider(unifiedresources.SourceVMware, r.vmwarePoller)
|
||
}
|
||
|
||
// getTenantMonitor returns the appropriate monitor for the current request's tenant.
|
||
// For non-default orgs, it fails closed when tenant monitor resolution fails.
|
||
func (r *Router) getTenantMonitor(ctx context.Context) *monitoring.Monitor {
|
||
orgID := GetOrgID(ctx)
|
||
|
||
// Default/legacy path remains backward compatible.
|
||
if orgID == "" || orgID == "default" {
|
||
return r.monitor
|
||
}
|
||
|
||
// Security: non-default orgs must never use the default monitor.
|
||
if r.mtMonitor == nil {
|
||
log.Warn().
|
||
Str("org_id", orgID).
|
||
Msg("Tenant monitor unavailable for non-default org request")
|
||
return nil
|
||
}
|
||
|
||
monitor, err := r.mtMonitor.GetMonitor(orgID)
|
||
if err != nil || monitor == nil {
|
||
log.Warn().
|
||
Err(err).
|
||
Str("org_id", orgID).
|
||
Msg("Failed to resolve tenant monitor for non-default org request")
|
||
return nil
|
||
}
|
||
|
||
r.StartPatrolForOrg(ctx, orgID)
|
||
|
||
return monitor
|
||
}
|
||
|
||
// SetConfig refreshes the configuration reference used by the router and dependent handlers.
|
||
func (r *Router) SetConfig(cfg *config.Config) {
|
||
if cfg == nil {
|
||
return
|
||
}
|
||
|
||
config.Mu.Lock()
|
||
defer config.Mu.Unlock()
|
||
|
||
if r.config == nil {
|
||
r.config = cfg
|
||
} else {
|
||
*r.config = *cfg
|
||
}
|
||
|
||
if r.configHandlers != nil {
|
||
r.configHandlers.SetConfig(r.config)
|
||
}
|
||
if r.systemSettingsHandler != nil {
|
||
r.systemSettingsHandler.SetConfig(r.config)
|
||
}
|
||
if r.aiSettingsHandler != nil {
|
||
r.aiSettingsHandler.SetConfig(r.config)
|
||
}
|
||
if r.licenseHandlers != nil {
|
||
r.licenseHandlers.SetConfig(r.config)
|
||
}
|
||
}
|
||
|
||
// SetDiscoveryService sets the discovery service for the router.
|
||
func (r *Router) SetDiscoveryService(svc *servicediscovery.Service) {
|
||
if r.discoveryHandlers != nil {
|
||
r.discoveryHandlers.SetService(svc)
|
||
}
|
||
|
||
// Wire up WebSocket hub for progress broadcasting
|
||
if svc != nil && r.wsHub != nil {
|
||
svc.SetWSHub(&wsHubAdapter{hub: r.wsHub})
|
||
log.Info().Msg("Discovery: WebSocket hub wired for progress broadcasting")
|
||
}
|
||
}
|
||
|
||
// SetDiscoveryAIConfigProvider sets the AI config provider for showing AI provider info in discovery.
|
||
func (r *Router) SetDiscoveryAIConfigProvider(provider AIConfigProvider) {
|
||
if r.discoveryHandlers != nil {
|
||
r.discoveryHandlers.SetAIConfigProvider(provider)
|
||
}
|
||
}
|
||
|
||
// wsHubAdapter adapts websocket.Hub to the servicediscovery.WSBroadcaster interface.
|
||
type wsHubAdapter struct {
|
||
hub *websocket.Hub
|
||
}
|
||
|
||
// BroadcastDiscoveryProgress broadcasts discovery progress to all WebSocket clients.
|
||
func (a *wsHubAdapter) BroadcastDiscoveryProgress(progress *servicediscovery.DiscoveryProgress) {
|
||
if a.hub == nil || progress == nil {
|
||
return
|
||
}
|
||
a.hub.BroadcastMessage(websocket.Message{
|
||
Type: "ai_discovery_progress",
|
||
Data: progress,
|
||
})
|
||
}
|
||
|
||
func normalizePatrolOrgID(orgID string) string {
|
||
orgID = strings.TrimSpace(orgID)
|
||
if orgID == "" {
|
||
return "default"
|
||
}
|
||
return orgID
|
||
}
|
||
|
||
func (r *Router) markPatrolStarted(orgID string) bool {
|
||
if r == nil {
|
||
return false
|
||
}
|
||
orgID = normalizePatrolOrgID(orgID)
|
||
r.patrolLifecycleMu.Lock()
|
||
defer r.patrolLifecycleMu.Unlock()
|
||
if r.startedPatrolOrgs == nil {
|
||
r.startedPatrolOrgs = make(map[string]bool)
|
||
}
|
||
if r.startedPatrolOrgs[orgID] {
|
||
return false
|
||
}
|
||
r.startedPatrolOrgs[orgID] = true
|
||
return true
|
||
}
|
||
|
||
func (r *Router) clearPatrolStarted(orgID string) {
|
||
if r == nil {
|
||
return
|
||
}
|
||
orgID = normalizePatrolOrgID(orgID)
|
||
r.patrolLifecycleMu.Lock()
|
||
delete(r.startedPatrolOrgs, orgID)
|
||
r.patrolLifecycleMu.Unlock()
|
||
}
|
||
|
||
func (r *Router) patrolCtx(ctx context.Context, orgID string) context.Context {
|
||
orgID = normalizePatrolOrgID(orgID)
|
||
if ctx == nil {
|
||
return context.WithValue(context.Background(), OrgIDContextKey, orgID)
|
||
}
|
||
if existing := strings.TrimSpace(GetOrgID(ctx)); existing != "" {
|
||
return ctx
|
||
}
|
||
return context.WithValue(ctx, OrgIDContextKey, orgID)
|
||
}
|
||
|
||
// StartPatrol starts the AI patrol service for background infrastructure monitoring
|
||
func (r *Router) StartPatrol(ctx context.Context) {
|
||
orgID := normalizePatrolOrgID(GetOrgID(ctx))
|
||
r.StartPatrolForOrg(ctx, orgID)
|
||
}
|
||
|
||
// StartPatrolForOrg starts patrol/intelligence lifecycle for a specific org exactly once.
|
||
func (r *Router) StartPatrolForOrg(ctx context.Context, orgID string) {
|
||
orgID = normalizePatrolOrgID(orgID)
|
||
if !r.markPatrolStarted(orgID) {
|
||
return
|
||
}
|
||
orgCtx := r.patrolCtx(ctx, orgID)
|
||
if !r.startPatrolForContext(orgCtx, orgID) {
|
||
r.clearPatrolStarted(orgID)
|
||
}
|
||
}
|
||
|
||
func (r *Router) startPatrolForContext(ctx context.Context, orgID string) bool {
|
||
if r == nil || r.aiSettingsHandler == nil {
|
||
return false
|
||
}
|
||
ctx = r.patrolCtx(ctx, orgID)
|
||
aiService := r.aiSettingsHandler.GetAIService(ctx)
|
||
if aiService == nil || !aiService.IsEnabled() {
|
||
return false
|
||
}
|
||
if orgID != "default" && aiService.GetOrgID() != orgID {
|
||
log.Warn().
|
||
Str("org_id", orgID).
|
||
Str("service_org_id", aiService.GetOrgID()).
|
||
Msg("Patrol start aborted: AI service org scope mismatch")
|
||
return false
|
||
}
|
||
monitor := r.getTenantMonitor(ctx)
|
||
if orgID != "default" && monitor == nil {
|
||
log.Warn().
|
||
Str("org_id", orgID).
|
||
Msg("Patrol start aborted: tenant monitor unavailable")
|
||
return false
|
||
}
|
||
persistence := r.persistenceForOrg(ctx)
|
||
|
||
// Connect patrol to user-configured alert thresholds so it warns before alerts fire
|
||
if monitor != nil {
|
||
if alertManager := monitor.GetAlertManager(); alertManager != nil {
|
||
thresholdAdapter := ai.NewAlertThresholdAdapter(alertManager)
|
||
aiService.SetPatrolThresholdProvider(thresholdAdapter)
|
||
}
|
||
if incidentStore := monitor.GetIncidentStore(); incidentStore != nil {
|
||
aiService.SetIncidentStore(incidentStore)
|
||
r.aiSettingsHandler.SetIncidentStoreForOrg(orgID, incidentStore)
|
||
}
|
||
}
|
||
|
||
// Enable findings persistence (load from disk, auto-save on changes)
|
||
if persistence != nil {
|
||
findingsPersistence := ai.NewFindingsPersistenceAdapter(persistence)
|
||
historyPersistence := ai.NewPatrolHistoryPersistenceAdapter(persistence)
|
||
if patrol := aiService.GetPatrolService(); patrol != nil {
|
||
if err := patrol.SetFindingsPersistence(findingsPersistence); err != nil {
|
||
log.Error().Err(err).Msg("Failed to initialize AI findings persistence")
|
||
}
|
||
// Enable patrol run history persistence
|
||
if err := patrol.SetRunHistoryPersistence(historyPersistence); err != nil {
|
||
log.Error().Err(err).Msg("Failed to initialize AI patrol run history persistence")
|
||
}
|
||
}
|
||
}
|
||
|
||
// Connect patrol to metrics history for enriched context (trends, predictions)
|
||
if monitor != nil {
|
||
if metricsHistory := monitor.GetMetricsHistory(); metricsHistory != nil {
|
||
adapter := ai.NewMetricsHistoryAdapter(metricsHistory)
|
||
if adapter != nil {
|
||
aiService.SetMetricsHistoryProvider(adapter)
|
||
}
|
||
|
||
// Initialize baseline store for anomaly detection.
|
||
baselineCfg := ai.DefaultBaselineConfig()
|
||
if persistence != nil {
|
||
baselineCfg.DataDir = persistence.DataDir()
|
||
}
|
||
baselineStore := ai.NewBaselineStore(baselineCfg)
|
||
if baselineStore != nil {
|
||
aiService.SetBaselineStore(baselineStore)
|
||
|
||
// Start background baseline learning loop
|
||
go r.startBaselineLearning(ctx, baselineStore, metricsHistory)
|
||
}
|
||
}
|
||
}
|
||
|
||
// Initialize operational memory (change detection and remediation logging)
|
||
dataDir := ""
|
||
if persistence != nil {
|
||
dataDir = persistence.DataDir()
|
||
}
|
||
|
||
changeDetector := ai.NewChangeDetector(ai.ChangeDetectorConfig{
|
||
MaxChanges: 1000,
|
||
DataDir: dataDir,
|
||
})
|
||
if changeDetector != nil {
|
||
aiService.SetChangeDetector(changeDetector)
|
||
}
|
||
|
||
remediationLog := ai.NewRemediationLog(ai.RemediationLogConfig{
|
||
MaxRecords: 500,
|
||
DataDir: dataDir,
|
||
})
|
||
if remediationLog != nil {
|
||
aiService.SetRemediationLog(remediationLog)
|
||
}
|
||
|
||
// Initialize pattern and correlation detectors for AI-enabled orgs.
|
||
if aiService.IsEnabled() {
|
||
// Initialize pattern detector for failure prediction
|
||
patternDetector := ai.NewPatternDetector(ai.PatternDetectorConfig{
|
||
MaxEvents: 5000,
|
||
MinOccurrences: 3,
|
||
PatternWindow: 90 * 24 * time.Hour,
|
||
PredictionLimit: 30 * 24 * time.Hour,
|
||
DataDir: dataDir,
|
||
})
|
||
if patternDetector != nil {
|
||
aiService.SetPatternDetector(patternDetector)
|
||
|
||
// Wire alert history to pattern detector for event tracking
|
||
if monitor != nil {
|
||
if alertManager := monitor.GetAlertManager(); alertManager != nil {
|
||
alertManager.OnAlertHistory(func(alert alerts.Alert) {
|
||
// Convert alert type to trackable event
|
||
patternDetector.RecordFromAlert(alert.ResourceID, alert.Type+"_"+string(alert.Level), alert.StartTime)
|
||
})
|
||
log.Info().Msg("AI Pattern Detector: Wired to alert history for failure prediction")
|
||
}
|
||
}
|
||
}
|
||
|
||
// Initialize correlation detector for multi-resource relationships
|
||
correlationDetector := ai.NewCorrelationDetector(ai.CorrelationConfig{
|
||
MaxEvents: 10000,
|
||
CorrelationWindow: 10 * time.Minute,
|
||
MinOccurrences: 3,
|
||
RetentionWindow: 30 * 24 * time.Hour,
|
||
DataDir: dataDir,
|
||
})
|
||
if correlationDetector != nil {
|
||
aiService.SetCorrelationDetector(correlationDetector)
|
||
|
||
// Wire alert history to correlation detector
|
||
if monitor != nil {
|
||
if alertManager := monitor.GetAlertManager(); alertManager != nil {
|
||
alertManager.OnAlertHistory(func(alert alerts.Alert) {
|
||
// Record as correlation event
|
||
eventType := ai.CorrelationEventType(ai.CorrelationEventAlert)
|
||
switch alert.Type {
|
||
case "cpu":
|
||
eventType = ai.CorrelationEventHighCPU
|
||
case "memory":
|
||
eventType = ai.CorrelationEventHighMem
|
||
case "disk":
|
||
eventType = ai.CorrelationEventDiskFull
|
||
case "offline", "connectivity":
|
||
eventType = ai.CorrelationEventOffline
|
||
}
|
||
correlationDetector.RecordEvent(ai.CorrelationEvent{
|
||
ResourceID: alert.ResourceID,
|
||
ResourceName: alert.ResourceName,
|
||
ResourceType: alert.Type,
|
||
EventType: eventType,
|
||
Timestamp: alert.StartTime,
|
||
Value: alert.Value,
|
||
})
|
||
})
|
||
log.Info().Msg("AI Correlation Detector: Wired to alert history for multi-resource analysis")
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Initialize new AI intelligence services (Phase 6)
|
||
r.initializeAIIntelligenceServices(ctx, orgID, dataDir, monitor)
|
||
|
||
// Wire unified finding callback AFTER initializeAIIntelligenceServices
|
||
// (unified store is created there) and AFTER findings persistence is loaded
|
||
patrol := aiService.GetPatrolService()
|
||
if patrol != nil {
|
||
if unifiedStore := r.aiSettingsHandler.GetUnifiedStoreForOrg(orgID); unifiedStore != nil {
|
||
toUnifiedLifecycle := func(events []ai.FindingLifecycleEvent) []unified.UnifiedFindingLifecycleEvent {
|
||
if len(events) == 0 {
|
||
return nil
|
||
}
|
||
out := make([]unified.UnifiedFindingLifecycleEvent, 0, len(events))
|
||
for _, e := range events {
|
||
out = append(out, unified.UnifiedFindingLifecycleEvent{
|
||
At: e.At,
|
||
Type: e.Type,
|
||
Message: e.Message,
|
||
From: e.From,
|
||
To: e.To,
|
||
Metadata: e.Metadata,
|
||
})
|
||
}
|
||
return out
|
||
}
|
||
patrol.SetUnifiedFindingCallback(func(f *ai.Finding) bool {
|
||
// Convert ai.Finding to unified.UnifiedFinding
|
||
uf := &unified.UnifiedFinding{
|
||
ID: f.ID,
|
||
Source: unified.SourceAIPatrol,
|
||
Severity: unified.UnifiedSeverity(f.Severity),
|
||
Category: unified.UnifiedCategory(f.Category),
|
||
ResourceID: f.ResourceID,
|
||
ResourceName: f.ResourceName,
|
||
ResourceType: f.ResourceType,
|
||
Node: f.Node,
|
||
Title: f.Title,
|
||
Description: f.Description,
|
||
Recommendation: f.Recommendation,
|
||
Evidence: f.Evidence,
|
||
DetectedAt: f.DetectedAt,
|
||
LastSeenAt: f.LastSeenAt,
|
||
ResolvedAt: f.ResolvedAt,
|
||
InvestigationSessionID: f.InvestigationSessionID,
|
||
InvestigationStatus: f.InvestigationStatus,
|
||
InvestigationOutcome: f.InvestigationOutcome,
|
||
LastInvestigatedAt: f.LastInvestigatedAt,
|
||
InvestigationAttempts: f.InvestigationAttempts,
|
||
LoopState: f.LoopState,
|
||
Lifecycle: toUnifiedLifecycle(f.Lifecycle),
|
||
RegressionCount: f.RegressionCount,
|
||
LastRegressionAt: f.LastRegressionAt,
|
||
AcknowledgedAt: f.AcknowledgedAt,
|
||
SnoozedUntil: f.SnoozedUntil,
|
||
DismissedReason: f.DismissedReason,
|
||
UserNote: f.UserNote,
|
||
Suppressed: f.Suppressed,
|
||
TimesRaised: f.TimesRaised,
|
||
}
|
||
_, isNew := unifiedStore.AddFromAI(uf)
|
||
return isNew
|
||
})
|
||
patrol.SetUnifiedFindingResolver(func(findingID string) {
|
||
unifiedStore.Resolve(findingID)
|
||
})
|
||
|
||
// Wire push notifications: patrol findings → relay client (best-effort)
|
||
patrol.SetPushNotifyCallback(func(n relay.PushNotificationPayload) {
|
||
r.relayMu.RLock()
|
||
client := r.relayClient
|
||
r.relayMu.RUnlock()
|
||
if client != nil {
|
||
if err := client.SendPushNotification(n); err != nil {
|
||
log.Debug().Err(err).Str("type", n.Type).Msg("Push notification send failed")
|
||
}
|
||
}
|
||
})
|
||
|
||
log.Info().Msg("AI Intelligence: Patrol findings wired to unified store")
|
||
|
||
// Sync existing findings from persistence to the unified store
|
||
// (findings loaded from disk before the callback was set)
|
||
existingFindings := patrol.GetFindingsHistory(nil)
|
||
if len(existingFindings) > 0 {
|
||
for _, f := range existingFindings {
|
||
if f == nil {
|
||
continue
|
||
}
|
||
uf := &unified.UnifiedFinding{
|
||
ID: f.ID,
|
||
Source: unified.SourceAIPatrol,
|
||
Severity: unified.UnifiedSeverity(f.Severity),
|
||
Category: unified.UnifiedCategory(f.Category),
|
||
ResourceID: f.ResourceID,
|
||
ResourceName: f.ResourceName,
|
||
ResourceType: f.ResourceType,
|
||
Node: f.Node,
|
||
Title: f.Title,
|
||
Description: f.Description,
|
||
Recommendation: f.Recommendation,
|
||
Evidence: f.Evidence,
|
||
DetectedAt: f.DetectedAt,
|
||
LastSeenAt: f.LastSeenAt,
|
||
ResolvedAt: f.ResolvedAt,
|
||
InvestigationSessionID: f.InvestigationSessionID,
|
||
InvestigationStatus: f.InvestigationStatus,
|
||
InvestigationOutcome: f.InvestigationOutcome,
|
||
LastInvestigatedAt: f.LastInvestigatedAt,
|
||
InvestigationAttempts: f.InvestigationAttempts,
|
||
LoopState: f.LoopState,
|
||
Lifecycle: toUnifiedLifecycle(f.Lifecycle),
|
||
RegressionCount: f.RegressionCount,
|
||
LastRegressionAt: f.LastRegressionAt,
|
||
AcknowledgedAt: f.AcknowledgedAt,
|
||
SnoozedUntil: f.SnoozedUntil,
|
||
DismissedReason: f.DismissedReason,
|
||
UserNote: f.UserNote,
|
||
Suppressed: f.Suppressed,
|
||
TimesRaised: f.TimesRaised,
|
||
}
|
||
// Copy resolution timestamp if resolved
|
||
if f.ResolvedAt != nil || f.AutoResolved {
|
||
now := time.Now()
|
||
if f.ResolvedAt != nil {
|
||
uf.ResolvedAt = f.ResolvedAt
|
||
} else {
|
||
uf.ResolvedAt = &now
|
||
}
|
||
}
|
||
unifiedStore.AddFromAI(uf)
|
||
}
|
||
log.Info().Int("count", len(existingFindings)).Msg("AI Intelligence: Synced existing patrol findings to unified store")
|
||
}
|
||
|
||
// Wire unified store for "Discuss with Assistant" finding context lookup
|
||
r.aiHandler.SetUnifiedStoreForOrg(orgID, unifiedStore)
|
||
}
|
||
}
|
||
|
||
// Finally start the actual patrol loop
|
||
r.aiSettingsHandler.StartPatrol(ctx)
|
||
|
||
// Wire up discovery service to the handlers
|
||
// This enables the /api/discovery endpoints to trigger discovery scans
|
||
aiService = r.aiSettingsHandler.GetAIService(ctx)
|
||
if aiService != nil {
|
||
if discoveryService := aiService.GetDiscoveryService(); discoveryService != nil {
|
||
r.SetDiscoveryService(discoveryService)
|
||
log.Info().Msg("Discovery: Service wired to API handlers")
|
||
}
|
||
// Wire up AI config provider for showing AI provider info in discovery UI
|
||
r.SetDiscoveryAIConfigProvider(aiService)
|
||
}
|
||
|
||
return true
|
||
}
|
||
|
||
// initializeAIIntelligenceServices sets up the new AI intelligence subsystems
|
||
func (r *Router) initializeAIIntelligenceServices(ctx context.Context, orgID, dataDir string, monitor *monitoring.Monitor) {
|
||
aiService := r.aiSettingsHandler.GetAIService(ctx)
|
||
if aiService == nil || !aiService.IsEnabled() {
|
||
return
|
||
}
|
||
if orgID != "default" && aiService.GetOrgID() != orgID {
|
||
log.Warn().
|
||
Str("org_id", orgID).
|
||
Str("service_org_id", aiService.GetOrgID()).
|
||
Msg("AI intelligence initialization skipped: AI service org scope mismatch")
|
||
return
|
||
}
|
||
|
||
// 1. Initialize circuit breaker for resilient patrol
|
||
circuitBreaker := circuit.NewBreaker("patrol", circuit.DefaultConfig())
|
||
r.aiSettingsHandler.SetCircuitBreakerForOrg(orgID, circuitBreaker)
|
||
log.Info().Msg("AI Intelligence: Circuit breaker initialized")
|
||
|
||
// 2. Initialize learning store for feedback learning
|
||
learningCfg := learning.LearningStoreConfig{
|
||
DataDir: dataDir,
|
||
}
|
||
learningStore := learning.NewLearningStore(learningCfg)
|
||
r.aiSettingsHandler.SetLearningStoreForOrg(orgID, learningStore)
|
||
log.Info().Msg("AI Intelligence: Learning store initialized")
|
||
|
||
// 4. Initialize forecast service for trend forecasting
|
||
forecastCfg := forecast.DefaultForecastConfig()
|
||
forecastService := forecast.NewService(forecastCfg)
|
||
// Wire up data provider adapter
|
||
if monitor != nil {
|
||
if metricsHistory := monitor.GetMetricsHistory(); metricsHistory != nil {
|
||
dataAdapter := adapters.NewForecastDataAdapter(metricsHistory)
|
||
if dataAdapter != nil {
|
||
forecastService.SetDataProvider(dataAdapter)
|
||
}
|
||
}
|
||
}
|
||
// Wire up resource iterator for forecast context (via ReadState)
|
||
if monitor != nil {
|
||
if rs := monitor.GetUnifiedReadState(); rs != nil {
|
||
forecastService.SetResourceIterator(&forecastResourceIterator{readState: rs})
|
||
} else {
|
||
log.Warn().Msg("AI Intelligence: Forecast resource iterator not wired — ReadState unavailable")
|
||
}
|
||
}
|
||
r.aiSettingsHandler.SetForecastServiceForOrg(orgID, forecastService)
|
||
log.Info().Msg("AI Intelligence: Forecast service initialized")
|
||
|
||
// 5. Initialize Proxmox event correlator
|
||
proxmoxCfg := proxmox.DefaultEventCorrelatorConfig()
|
||
proxmoxCfg.DataDir = dataDir
|
||
proxmoxCorrelator := proxmox.NewEventCorrelator(proxmoxCfg)
|
||
r.aiSettingsHandler.SetProxmoxCorrelatorForOrg(orgID, proxmoxCorrelator)
|
||
log.Info().Msg("AI Intelligence: Proxmox event correlator initialized")
|
||
|
||
// 7. Initialize remediation engine for AI-guided fixes (requires Pulse Pro)
|
||
var remediationEngine aicontracts.RemediationEngine
|
||
if isAIInvestigationEnabled() {
|
||
remediationCfg := aicontracts.DefaultEngineConfig()
|
||
remediationCfg.DataDir = dataDir
|
||
if factory := getCreateRemediationEngine(); factory != nil {
|
||
remediationEngine = factory(remediationCfg)
|
||
}
|
||
if remediationEngine != nil {
|
||
// Wire up command executor (disabled by default for safety)
|
||
cmdExecutor := adapters.NewCommandExecutorAdapter()
|
||
remediationEngine.SetCommandExecutor(cmdExecutor)
|
||
r.aiSettingsHandler.SetRemediationEngineForOrg(orgID, remediationEngine)
|
||
log.Info().Msg("AI Intelligence: Remediation engine initialized (command execution disabled)")
|
||
} else {
|
||
// Clear any stale engine from a prior init cycle.
|
||
r.aiSettingsHandler.SetRemediationEngineForOrg(orgID, nil)
|
||
log.Info().Msg("AI Intelligence: Remediation engine factory not registered")
|
||
}
|
||
} else {
|
||
log.Info().Msg("AI Intelligence: Remediation engine skipped (requires Pulse Pro)")
|
||
}
|
||
|
||
// 8. Initialize unified alert/finding system and bridge
|
||
if monitor != nil {
|
||
if alertManager := monitor.GetAlertManager(); alertManager != nil {
|
||
// Create unified store
|
||
unifiedStore := unified.NewUnifiedStore(unified.DefaultAlertToFindingConfig())
|
||
r.aiSettingsHandler.SetUnifiedStoreForOrg(orgID, unifiedStore)
|
||
|
||
// Create alert bridge
|
||
alertBridge := unified.NewAlertBridge(unifiedStore, unified.DefaultBridgeConfig())
|
||
|
||
// Create and set alert provider adapter
|
||
alertAdapter := unified.NewAlertManagerAdapter(alertManager)
|
||
alertBridge.SetAlertProvider(alertAdapter)
|
||
|
||
// Set patrol trigger function (triggers mini-patrol on alert events)
|
||
patrol := aiService.GetPatrolService()
|
||
if patrol != nil {
|
||
alertBridge.SetPatrolTrigger(func(resourceID, resourceType, reason, alertType string) {
|
||
scope := ai.PatrolScope{
|
||
ResourceIDs: []string{resourceID},
|
||
ResourceTypes: []string{resourceType},
|
||
Depth: ai.PatrolDepthQuick,
|
||
Context: "Alert bridge: " + reason,
|
||
Priority: 50,
|
||
}
|
||
switch reason {
|
||
case "alert_fired":
|
||
scope.Reason = ai.TriggerReasonAlertFired
|
||
scope.Priority = 80
|
||
if alertType != "" {
|
||
scope.Context = "Alert: " + alertType
|
||
}
|
||
case "alert_cleared":
|
||
scope.Reason = ai.TriggerReasonAlertCleared
|
||
scope.Priority = 40
|
||
if alertType != "" {
|
||
scope.Context = "Alert cleared: " + alertType
|
||
}
|
||
default:
|
||
scope.Reason = ai.TriggerReasonManual
|
||
}
|
||
|
||
log.Debug().
|
||
Str("resource_id", resourceID).
|
||
Str("reason", reason).
|
||
Msg("Alert bridge: Triggering mini-patrol")
|
||
if triggerManager := r.aiSettingsHandler.GetTriggerManagerForOrg(orgID); triggerManager != nil {
|
||
if triggerManager.TriggerPatrol(scope) {
|
||
log.Debug().
|
||
Str("resource_id", resourceID).
|
||
Str("reason", reason).
|
||
Msg("Alert bridge: Queued patrol via trigger manager")
|
||
} else {
|
||
log.Warn().
|
||
Str("resource_id", resourceID).
|
||
Str("reason", reason).
|
||
Msg("Alert bridge: Patrol trigger rejected by trigger manager")
|
||
}
|
||
return
|
||
}
|
||
orgCtx := context.WithValue(context.Background(), OrgIDContextKey, orgID)
|
||
patrol.TriggerScopedPatrol(orgCtx, scope)
|
||
})
|
||
}
|
||
|
||
// Start the bridge
|
||
alertBridge.Start()
|
||
r.aiSettingsHandler.SetAlertBridgeForOrg(orgID, alertBridge)
|
||
log.Info().Msg("AI Intelligence: Unified alert/finding bridge initialized and started")
|
||
}
|
||
}
|
||
|
||
// 9. Wire up AI intelligence providers to patrol service for context injection
|
||
patrol := aiService.GetPatrolService()
|
||
if patrol != nil {
|
||
// Wire learning store for user preference context
|
||
if learningStore != nil {
|
||
patrol.SetLearningProvider(learningStore)
|
||
}
|
||
|
||
// Wire proxmox correlator for operations context
|
||
if proxmoxCorrelator != nil {
|
||
patrol.SetProxmoxEventProvider(proxmoxCorrelator)
|
||
}
|
||
|
||
// Wire forecast service for trend predictions
|
||
if forecastService != nil {
|
||
patrol.SetForecastProvider(forecastService)
|
||
}
|
||
|
||
// Wire remediation engine for auto-generating fix plans from findings
|
||
if remediationEngine != nil {
|
||
patrol.SetRemediationEngine(remediationEngine)
|
||
}
|
||
|
||
// Wire guest prober for pre-patrol reachability checks via Unified Agents
|
||
if r.agentExecServer != nil {
|
||
patrol.SetGuestProber(ai.NewAgentExecProber(r.agentExecServer))
|
||
}
|
||
|
||
// NOTE: Unified finding callback is wired in StartPatrol after findings persistence is loaded
|
||
|
||
log.Info().Msg("AI Intelligence: Patrol context providers wired up")
|
||
}
|
||
|
||
// 10. Initialize event-driven patrol trigger manager (Phase 7)
|
||
if patrol != nil {
|
||
triggerManager := ai.NewTriggerManager(ai.DefaultTriggerManagerConfig())
|
||
|
||
// Set the patrol executor callback
|
||
triggerManager.SetOnTrigger(func(ctx context.Context, scope ai.PatrolScope) {
|
||
patrol.TriggerScopedPatrol(ctx, scope)
|
||
})
|
||
|
||
// Start the trigger manager
|
||
triggerManager.Start(ctx)
|
||
|
||
// Wire to patrol service
|
||
patrol.SetTriggerManager(triggerManager)
|
||
|
||
// Store reference for shutdown and alert callbacks
|
||
r.aiSettingsHandler.SetTriggerManagerForOrg(orgID, triggerManager)
|
||
|
||
// 11. Wire baseline anomaly callback to TriggerManager
|
||
if baselineStore := patrol.GetBaselineStore(); baselineStore != nil {
|
||
baselineStore.SetAnomalyCallback(func(resourceID, resourceType, metric string, severity baseline.AnomalySeverity, value, baselineValue float64) {
|
||
// Only trigger for significant anomalies (high or critical)
|
||
if severity == baseline.AnomalyHigh || severity == baseline.AnomalyCritical {
|
||
scope := ai.AnomalyTriggeredPatrolScope(
|
||
resourceID,
|
||
resourceType,
|
||
metric,
|
||
string(severity),
|
||
)
|
||
if triggerManager.TriggerPatrol(scope) {
|
||
log.Debug().
|
||
Str("resourceID", resourceID).
|
||
Str("metric", metric).
|
||
Str("severity", string(severity)).
|
||
Msg("Anomaly triggered mini-patrol via TriggerManager")
|
||
}
|
||
}
|
||
})
|
||
log.Info().Msg("AI Intelligence: Baseline anomaly callback wired to trigger manager")
|
||
}
|
||
|
||
log.Info().Msg("AI Intelligence: Event-driven trigger manager initialized and started")
|
||
}
|
||
|
||
// 12. Initialize incident coordinator for high-frequency recording
|
||
if patrol != nil {
|
||
incidentCoordinator := ai.NewIncidentCoordinator(ai.DefaultIncidentCoordinatorConfig())
|
||
|
||
// Wire the incident store if available
|
||
if incidentStore := patrol.GetIncidentStore(); incidentStore != nil {
|
||
incidentCoordinator.SetIncidentStore(incidentStore)
|
||
}
|
||
|
||
// Create metrics adapter for incident recorder (ReadState is sole source since SRC-03m)
|
||
var metricsAdapter *adapters.MetricsAdapter
|
||
if monitor != nil {
|
||
metricsAdapter = adapters.NewMetricsAdapter(monitor.GetUnifiedReadState())
|
||
}
|
||
|
||
// Initialize and wire the incident recorder (high-frequency metrics)
|
||
if metricsAdapter != nil {
|
||
recorderCfg := metrics.DefaultIncidentRecorderConfig()
|
||
recorderCfg.DataDir = dataDir
|
||
recorder := metrics.NewIncidentRecorder(recorderCfg)
|
||
recorder.SetMetricsProvider(metricsAdapter)
|
||
recorder.Start()
|
||
incidentCoordinator.SetRecorder(recorder)
|
||
r.aiSettingsHandler.SetIncidentRecorderForOrg(orgID, recorder)
|
||
log.Info().Msg("AI Intelligence: Incident recorder initialized and started")
|
||
}
|
||
|
||
// Start the coordinator
|
||
incidentCoordinator.Start()
|
||
|
||
// Store reference
|
||
r.aiSettingsHandler.SetIncidentCoordinatorForOrg(orgID, incidentCoordinator)
|
||
|
||
log.Info().Msg("AI Intelligence: Incident coordinator initialized and started")
|
||
}
|
||
|
||
log.Info().Msg("AI Intelligence: All Phase 6 & 7 services initialized successfully")
|
||
}
|
||
|
||
// StopPatrol stops the AI patrol service
|
||
func (r *Router) StopPatrol() {
|
||
if r.aiSettingsHandler != nil {
|
||
r.aiSettingsHandler.StopPatrol()
|
||
}
|
||
r.patrolLifecycleMu.Lock()
|
||
r.startedPatrolOrgs = make(map[string]bool)
|
||
r.patrolLifecycleMu.Unlock()
|
||
}
|
||
|
||
// StopPatrolForOrg stops patrol for a single org and clears its lifecycle marker.
|
||
func (r *Router) StopPatrolForOrg(orgID string) {
|
||
orgID = normalizePatrolOrgID(orgID)
|
||
if r.aiSettingsHandler != nil {
|
||
r.aiSettingsHandler.StopPatrolForOrg(orgID)
|
||
}
|
||
r.clearPatrolStarted(orgID)
|
||
}
|
||
|
||
// ShutdownAIIntelligence gracefully shuts down all AI intelligence services (Phase 6)
|
||
// This should be called during application shutdown to ensure proper cleanup
|
||
func (r *Router) ShutdownAIIntelligence() {
|
||
r.shutdownBackgroundWorkers()
|
||
|
||
if r.aiSettingsHandler == nil {
|
||
return
|
||
}
|
||
|
||
log.Info().Msg("AI Intelligence: Starting graceful shutdown")
|
||
|
||
// 1. Stop alert bridges (stop listening for alert events)
|
||
for orgID, alertBridge := range r.aiSettingsHandler.ListAlertBridges() {
|
||
if alertBridge == nil {
|
||
continue
|
||
}
|
||
alertBridge.Stop()
|
||
log.Debug().Str("org_id", orgID).Msg("AI Intelligence: Alert bridge stopped")
|
||
}
|
||
|
||
// 2. Stop patrol service for all tenants (waits for in-flight investigations, force-saves state)
|
||
// Use StopPatrol() which stops patrol for both legacy and all tenant services
|
||
r.aiSettingsHandler.StopPatrol()
|
||
log.Debug().Msg("AI Intelligence: All patrol services stopped")
|
||
|
||
// 3. Stop trigger managers (stop event-driven patrol scheduling)
|
||
for orgID, triggerManager := range r.aiSettingsHandler.ListTriggerManagers() {
|
||
if triggerManager == nil {
|
||
continue
|
||
}
|
||
triggerManager.Stop()
|
||
log.Debug().Str("org_id", orgID).Msg("AI Intelligence: Trigger manager stopped")
|
||
}
|
||
|
||
// 4. Stop incident coordinators (stop high-frequency recording)
|
||
for orgID, incidentCoordinator := range r.aiSettingsHandler.ListIncidentCoordinators() {
|
||
if incidentCoordinator == nil {
|
||
continue
|
||
}
|
||
incidentCoordinator.Stop()
|
||
log.Debug().Str("org_id", orgID).Msg("AI Intelligence: Incident coordinator stopped")
|
||
}
|
||
|
||
// 4b. Stop incident recorders (stop background sampling)
|
||
for orgID, incidentRecorder := range r.aiSettingsHandler.ListIncidentRecorders() {
|
||
if incidentRecorder == nil {
|
||
continue
|
||
}
|
||
incidentRecorder.Stop()
|
||
log.Debug().Str("org_id", orgID).Msg("AI Intelligence: Incident recorder stopped")
|
||
}
|
||
|
||
// 5. Cleanup learning stores (removes old records, persists if data dir configured)
|
||
for orgID, learningStore := range r.aiSettingsHandler.ListLearningStores() {
|
||
if learningStore == nil {
|
||
continue
|
||
}
|
||
learningStore.Cleanup()
|
||
log.Debug().Str("org_id", orgID).Msg("AI Intelligence: Learning store cleaned up")
|
||
}
|
||
|
||
log.Info().Msg("AI Intelligence: Graceful shutdown complete")
|
||
}
|
||
|
||
func (r *Router) shutdownBackgroundWorkers() {
|
||
if r.lifecycleCancel != nil {
|
||
r.lifecycleCancel()
|
||
}
|
||
if r.sessionStore != nil {
|
||
r.sessionStore.Shutdown()
|
||
}
|
||
if r.csrfStore != nil {
|
||
r.csrfStore.Shutdown()
|
||
}
|
||
if r.recoveryTokenStore != nil {
|
||
r.recoveryTokenStore.Shutdown()
|
||
}
|
||
if r.trueNASPoller != nil {
|
||
r.trueNASPoller.Stop()
|
||
}
|
||
if r.vmwarePoller != nil {
|
||
r.vmwarePoller.Stop()
|
||
}
|
||
if r.deployStore != nil {
|
||
if err := r.deployStore.Close(); err != nil {
|
||
log.Error().Err(err).Msg("Failed to close deploy store")
|
||
}
|
||
}
|
||
}
|
||
|
||
// StartAIChat starts the AI chat service
|
||
// This is the new AI backend that supports tool calling and multi-model support
|
||
func (r *Router) StartAIChat(ctx context.Context) {
|
||
if r.aiHandler == nil {
|
||
return
|
||
}
|
||
if r.monitor == nil {
|
||
log.Warn().Msg("Cannot start AI chat: monitor not available")
|
||
return
|
||
}
|
||
|
||
if err := r.aiHandler.Start(ctx, r.monitor); err != nil {
|
||
log.Error().Err(err).Msg("Failed to start AI chat service")
|
||
return
|
||
}
|
||
|
||
// Ensure default-org chat service has org-scoped dependencies wired.
|
||
defaultOrgCtx := context.WithValue(context.Background(), OrgIDContextKey, "default")
|
||
if service := r.aiHandler.GetService(defaultOrgCtx); service != nil {
|
||
r.wireAIChatDependenciesForService(defaultOrgCtx, service)
|
||
}
|
||
|
||
// Wire up investigation orchestrator now that chat service is ready
|
||
// This must happen after Start() because the orchestrator needs the chat service
|
||
if r.aiSettingsHandler != nil {
|
||
r.aiSettingsHandler.WireOrchestratorAfterChatStart()
|
||
}
|
||
|
||
// Wire circuit breaker for patrol if AI is running
|
||
if r.aiHandler != nil && r.aiHandler.IsRunning(defaultOrgCtx) {
|
||
if r.aiSettingsHandler != nil {
|
||
if patrolSvc := r.aiSettingsHandler.GetAIService(defaultOrgCtx).GetPatrolService(); patrolSvc != nil {
|
||
// Wire circuit breaker for resilient AI API calls
|
||
if breaker := r.aiSettingsHandler.GetCircuitBreakerForOrg("default"); breaker != nil {
|
||
patrolSvc.SetCircuitBreaker(breaker)
|
||
log.Info().Msg("AI patrol circuit breaker wired")
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
func (r *Router) persistenceForOrg(ctx context.Context) *config.ConfigPersistence {
|
||
if r == nil {
|
||
return nil
|
||
}
|
||
|
||
orgID := strings.TrimSpace(GetOrgID(ctx))
|
||
if orgID == "" || orgID == "default" {
|
||
return r.persistence
|
||
}
|
||
|
||
if r.multiTenant != nil {
|
||
if p, err := r.multiTenant.GetPersistence(orgID); err == nil {
|
||
return p
|
||
}
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// wireAIChatDependenciesForService wires org-scoped MCP tool providers and chat-service
|
||
// integration for a specific chat service instance.
|
||
func (r *Router) wireAIChatDependenciesForService(ctx context.Context, service AIService) {
|
||
if r == nil || service == nil {
|
||
return
|
||
}
|
||
|
||
if ctx == nil {
|
||
ctx = context.Background()
|
||
}
|
||
orgID := strings.TrimSpace(GetOrgID(ctx))
|
||
if orgID == "" {
|
||
orgID = "default"
|
||
ctx = context.WithValue(context.Background(), OrgIDContextKey, orgID)
|
||
}
|
||
|
||
monitor := r.getTenantMonitor(ctx)
|
||
aiService := (*ai.Service)(nil)
|
||
if r.aiSettingsHandler != nil {
|
||
aiService = r.aiSettingsHandler.GetAIService(ctx)
|
||
if aiService != nil && orgID != "default" && aiService.GetOrgID() != orgID {
|
||
log.Warn().
|
||
Str("org_id", orgID).
|
||
Str("service_org_id", aiService.GetOrgID()).
|
||
Msg("AI chat dependency wiring skipped: AI service org scope mismatch")
|
||
aiService = nil
|
||
}
|
||
}
|
||
|
||
chatService, ok := service.(*chat.Service)
|
||
if !ok {
|
||
log.Warn().Msg("Chat service is not *chat.Service, cannot create patrol adapter")
|
||
} else if aiService != nil {
|
||
aiService.SetChatService(&chatServiceAdapter{svc: chatService})
|
||
|
||
// Wire mid-run budget enforcement from AI service to chat service.
|
||
chatService.SetBudgetChecker(func() error {
|
||
return aiService.CheckBudget("patrol")
|
||
})
|
||
|
||
log.Info().Str("org_id", orgID).Msg("Chat service wired to AI service for patrol and investigation")
|
||
}
|
||
|
||
// Wire alert provider
|
||
if monitor != nil {
|
||
if alertManager := monitor.GetAlertManager(); alertManager != nil {
|
||
alertAdapter := tools.NewAlertManagerMCPAdapter(alertManager)
|
||
if alertAdapter != nil {
|
||
service.SetAlertProvider(alertAdapter)
|
||
log.Debug().Msg("AI chat: Alert provider wired")
|
||
}
|
||
}
|
||
}
|
||
|
||
// Wire findings provider from patrol service.
|
||
if aiService != nil {
|
||
if patrolSvc := aiService.GetPatrolService(); patrolSvc != nil {
|
||
if r.aiSettingsHandler != nil {
|
||
if breaker := r.aiSettingsHandler.GetCircuitBreakerForOrg(orgID); breaker != nil {
|
||
patrolSvc.SetCircuitBreaker(breaker)
|
||
}
|
||
}
|
||
if findingsStore := patrolSvc.GetFindings(); findingsStore != nil {
|
||
findingsAdapter := ai.NewFindingsMCPAdapter(findingsStore)
|
||
if findingsAdapter != nil {
|
||
service.SetFindingsProvider(findingsAdapter)
|
||
log.Debug().Msg("AI chat: Findings provider wired")
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if persistence := r.persistenceForOrg(ctx); persistence != nil {
|
||
var licenseSvc licenseFeatureChecker
|
||
if r.licenseHandlers != nil {
|
||
licenseSvc = r.licenseHandlers.Service(ctx)
|
||
}
|
||
manager := NewMCPAgentProfileManager(persistence, licenseSvc)
|
||
service.SetAgentProfileManager(manager)
|
||
log.Debug().Msg("AI chat: Agent profile manager wired")
|
||
}
|
||
|
||
// Wire guest config provider (storage provider wiring removed)
|
||
if monitor != nil {
|
||
guestConfigAdapter := tools.NewGuestConfigMCPAdapter(monitor)
|
||
if guestConfigAdapter != nil {
|
||
service.SetGuestConfigProvider(guestConfigAdapter)
|
||
log.Debug().Msg("AI chat: Guest config provider wired")
|
||
}
|
||
}
|
||
|
||
// Wire backup provider
|
||
if monitor != nil {
|
||
m := monitor
|
||
backupAdapter := tools.NewBackupMCPAdapter(
|
||
func() models.Backups { return m.BackupsSnapshot() },
|
||
func() []models.PBSInstance { return m.PBSInstancesSnapshot() },
|
||
)
|
||
if backupAdapter != nil {
|
||
service.SetBackupProvider(backupAdapter)
|
||
log.Debug().Msg("AI chat: Backup provider wired")
|
||
}
|
||
}
|
||
|
||
// Wire disk health provider
|
||
if monitor != nil {
|
||
diskHealthAdapter := tools.NewDiskHealthMCPAdapter(monitor.GetUnifiedReadState())
|
||
if diskHealthAdapter != nil {
|
||
service.SetDiskHealthProvider(diskHealthAdapter)
|
||
log.Debug().Msg("AI chat: Disk health provider wired")
|
||
}
|
||
}
|
||
|
||
// Wire updates provider for Docker container updates
|
||
if monitor != nil {
|
||
cfg := r.config
|
||
if monitorCfg := monitor.GetConfig(); monitorCfg != nil {
|
||
cfg = monitorCfg
|
||
}
|
||
updatesAdapter := tools.NewUpdatesMCPAdapter(
|
||
monitor.GetUnifiedReadState(),
|
||
monitor,
|
||
&updatesConfigWrapper{cfg: cfg},
|
||
)
|
||
if updatesAdapter != nil {
|
||
service.SetUpdatesProvider(updatesAdapter)
|
||
log.Debug().Msg("AI chat: Updates provider wired")
|
||
}
|
||
}
|
||
|
||
// Wire metrics history provider
|
||
if monitor != nil {
|
||
if metricsHistory := monitor.GetMetricsHistory(); metricsHistory != nil {
|
||
metricsAdapter := tools.NewMetricsHistoryMCPAdapter(
|
||
&metricsSourceWrapper{history: metricsHistory},
|
||
monitor.GetUnifiedReadState(),
|
||
)
|
||
if metricsAdapter != nil {
|
||
service.SetMetricsHistory(metricsAdapter)
|
||
log.Debug().Msg("AI chat: Metrics history provider wired")
|
||
}
|
||
}
|
||
}
|
||
|
||
// Wire baseline provider.
|
||
if aiService != nil {
|
||
if patrolSvc := aiService.GetPatrolService(); patrolSvc != nil {
|
||
if baselineStore := patrolSvc.GetBaselineStore(); baselineStore != nil {
|
||
baselineAdapter := tools.NewBaselineMCPAdapter(&baselineSourceWrapper{store: baselineStore})
|
||
if baselineAdapter != nil {
|
||
service.SetBaselineProvider(baselineAdapter)
|
||
log.Debug().Msg("AI chat: Baseline provider wired")
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Wire pattern provider.
|
||
if aiService != nil {
|
||
if patrolSvc := aiService.GetPatrolService(); patrolSvc != nil {
|
||
if patternDetector := patrolSvc.GetPatternDetector(); patternDetector != nil {
|
||
patternAdapter := tools.NewPatternMCPAdapter(
|
||
&patternSourceWrapper{detector: patternDetector},
|
||
monitor.GetUnifiedReadState(),
|
||
)
|
||
if patternAdapter != nil {
|
||
service.SetPatternProvider(patternAdapter)
|
||
log.Debug().Msg("AI chat: Pattern provider wired")
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Wire findings manager.
|
||
if aiService != nil {
|
||
if patrolSvc := aiService.GetPatrolService(); patrolSvc != nil {
|
||
findingsManagerAdapter := tools.NewFindingsManagerMCPAdapter(patrolSvc)
|
||
if findingsManagerAdapter != nil {
|
||
service.SetFindingsManager(findingsManagerAdapter)
|
||
log.Debug().Msg("AI chat: Findings manager wired")
|
||
}
|
||
}
|
||
}
|
||
|
||
// Wire metadata updater.
|
||
if aiService != nil {
|
||
metadataAdapter := tools.NewMetadataUpdaterMCPAdapter(aiService)
|
||
if metadataAdapter != nil {
|
||
service.SetMetadataUpdater(metadataAdapter)
|
||
log.Debug().Msg("AI chat: Metadata updater wired")
|
||
}
|
||
}
|
||
|
||
// Wire intelligence providers for MCP tools
|
||
// - IncidentRecorderProvider: high-frequency incident data (pulse_get_incident_window)
|
||
// - EventCorrelatorProvider: Proxmox events (pulse_correlate_events)
|
||
// - KnowledgeStoreProvider: notes (pulse_remember, pulse_recall)
|
||
|
||
// Wire incident recorder provider (high-frequency incident data)
|
||
if r.aiSettingsHandler != nil {
|
||
if recorder := r.aiSettingsHandler.GetIncidentRecorderForOrg(orgID); recorder != nil {
|
||
service.SetIncidentRecorderProvider(&incidentRecorderProviderWrapper{recorder: recorder})
|
||
log.Debug().Msg("AI chat: Incident recorder provider wired")
|
||
}
|
||
}
|
||
|
||
// Wire event correlator provider (Proxmox events)
|
||
if r.aiSettingsHandler != nil {
|
||
if correlator := r.aiSettingsHandler.GetProxmoxCorrelatorForOrg(orgID); correlator != nil {
|
||
service.SetEventCorrelatorProvider(&eventCorrelatorProviderWrapper{correlator: correlator})
|
||
log.Debug().Msg("AI chat: Event correlator provider wired")
|
||
}
|
||
}
|
||
|
||
// Wire knowledge store provider for notes (pulse_remember, pulse_recall).
|
||
if aiService != nil {
|
||
if patrolSvc := aiService.GetPatrolService(); patrolSvc != nil {
|
||
if knowledgeStore := patrolSvc.GetKnowledgeStore(); knowledgeStore != nil {
|
||
service.SetKnowledgeStoreProvider(&knowledgeStoreProviderWrapper{store: knowledgeStore})
|
||
log.Debug().Msg("AI chat: Knowledge store provider wired")
|
||
}
|
||
}
|
||
}
|
||
|
||
// Wire discovery provider for AI-powered infrastructure discovery (pulse_get_discovery, pulse_list_discoveries).
|
||
if aiService != nil {
|
||
if discoverySvc := aiService.GetDiscoveryService(); discoverySvc != nil {
|
||
adapter := servicediscovery.NewToolsAdapter(discoverySvc)
|
||
if adapter != nil {
|
||
service.SetDiscoveryProvider(tools.NewDiscoveryMCPAdapter(adapter))
|
||
log.Debug().Msg("AI chat: Discovery provider wired")
|
||
}
|
||
}
|
||
}
|
||
|
||
// Wire unified resource provider for physical disks, Ceph, etc.
|
||
if monitor != nil {
|
||
if provider := r.unifiedResourceProviderForMonitor(monitor); provider != nil {
|
||
service.SetUnifiedResourceProvider(provider)
|
||
log.Debug().Msg("AI chat: Unified resource provider wired")
|
||
}
|
||
} else if orgID == "default" {
|
||
if provider := r.defaultUnifiedResourceProvider(); provider != nil {
|
||
service.SetUnifiedResourceProvider(provider)
|
||
log.Debug().Msg("AI chat: Unified resource provider wired")
|
||
}
|
||
}
|
||
if provider := newTrueNASAppActionProvider(r.trueNASPoller); provider != nil {
|
||
service.SetAppContainerActionProvider(provider)
|
||
log.Debug().Msg("AI chat: App-container action provider wired")
|
||
}
|
||
if provider := newTrueNASAppReadProvider(r.trueNASPoller); provider != nil {
|
||
service.SetAppContainerReadProvider(provider)
|
||
log.Debug().Msg("AI chat: App-container read provider wired")
|
||
}
|
||
if provider := newTrueNASAppConfigProvider(r.trueNASPoller); provider != nil {
|
||
service.SetAppContainerConfigProvider(provider)
|
||
log.Debug().Msg("AI chat: App-container config provider wired")
|
||
}
|
||
|
||
log.Info().Str("org_id", orgID).Msg("AI chat MCP tool providers wired")
|
||
}
|
||
|
||
// forecastResourceIterator wraps ReadState to implement forecast.ResourceIterator.
|
||
// Converts typed view accessors (ReadState) to forecast.ResourceInfo slices.
|
||
type forecastResourceIterator struct {
|
||
readState unifiedresources.ReadState
|
||
}
|
||
|
||
func (w *forecastResourceIterator) ForecastVMs() []forecast.ResourceInfo {
|
||
if w.readState == nil {
|
||
return nil
|
||
}
|
||
vms := w.readState.VMs()
|
||
result := make([]forecast.ResourceInfo, 0, len(vms))
|
||
for _, vm := range vms {
|
||
result = append(result, forecast.ResourceInfo{ID: vm.SourceID(), Name: vm.Name()})
|
||
}
|
||
return result
|
||
}
|
||
|
||
func (w *forecastResourceIterator) ForecastContainers() []forecast.ResourceInfo {
|
||
if w.readState == nil {
|
||
return nil
|
||
}
|
||
cts := w.readState.Containers()
|
||
result := make([]forecast.ResourceInfo, 0, len(cts))
|
||
for _, ct := range cts {
|
||
result = append(result, forecast.ResourceInfo{ID: ct.SourceID(), Name: ct.Name()})
|
||
}
|
||
return result
|
||
}
|
||
|
||
func (w *forecastResourceIterator) ForecastNodes() []forecast.ResourceInfo {
|
||
if w.readState == nil {
|
||
return nil
|
||
}
|
||
nodes := w.readState.Nodes()
|
||
result := make([]forecast.ResourceInfo, 0, len(nodes))
|
||
for _, node := range nodes {
|
||
result = append(result, forecast.ResourceInfo{ID: node.SourceID(), Name: node.Name()})
|
||
}
|
||
return result
|
||
}
|
||
|
||
func (w *forecastResourceIterator) ForecastStoragePools() []forecast.ResourceInfo {
|
||
if w.readState == nil {
|
||
return nil
|
||
}
|
||
pools := w.readState.StoragePools()
|
||
result := make([]forecast.ResourceInfo, 0, len(pools))
|
||
for _, sp := range pools {
|
||
result = append(result, forecast.ResourceInfo{ID: sp.SourceID(), Name: sp.Name()})
|
||
}
|
||
return result
|
||
}
|
||
|
||
// incidentRecorderProviderWrapper adapts metrics.IncidentRecorder to tools.IncidentRecorderProvider.
|
||
type incidentRecorderProviderWrapper struct {
|
||
recorder *metrics.IncidentRecorder
|
||
}
|
||
|
||
func (w *incidentRecorderProviderWrapper) GetWindowsForResource(resourceID string, limit int) []*tools.IncidentWindow {
|
||
if w.recorder == nil {
|
||
return nil
|
||
}
|
||
|
||
windows := w.recorder.GetWindowsForResource(resourceID, limit)
|
||
if len(windows) == 0 {
|
||
return nil
|
||
}
|
||
|
||
result := make([]*tools.IncidentWindow, 0, len(windows))
|
||
for _, window := range windows {
|
||
if window == nil {
|
||
continue
|
||
}
|
||
result = append(result, convertIncidentWindow(window))
|
||
}
|
||
return result
|
||
}
|
||
|
||
func (w *incidentRecorderProviderWrapper) GetWindow(windowID string) *tools.IncidentWindow {
|
||
if w.recorder == nil {
|
||
return nil
|
||
}
|
||
window := w.recorder.GetWindow(windowID)
|
||
if window == nil {
|
||
return nil
|
||
}
|
||
return convertIncidentWindow(window)
|
||
}
|
||
|
||
func convertIncidentWindow(window *metrics.IncidentWindow) *tools.IncidentWindow {
|
||
if window == nil {
|
||
return nil
|
||
}
|
||
|
||
points := make([]tools.IncidentDataPoint, 0, len(window.DataPoints))
|
||
for _, point := range window.DataPoints {
|
||
points = append(points, tools.IncidentDataPoint{
|
||
Timestamp: point.Timestamp,
|
||
Metrics: point.Metrics,
|
||
})
|
||
}
|
||
|
||
var summary *tools.IncidentSummary
|
||
if window.Summary != nil {
|
||
summary = &tools.IncidentSummary{
|
||
Duration: window.Summary.Duration,
|
||
DataPoints: window.Summary.DataPoints,
|
||
Peaks: window.Summary.Peaks,
|
||
Lows: window.Summary.Lows,
|
||
Averages: window.Summary.Averages,
|
||
Changes: window.Summary.Changes,
|
||
}
|
||
}
|
||
|
||
return &tools.IncidentWindow{
|
||
ID: window.ID,
|
||
ResourceID: window.ResourceID,
|
||
ResourceName: window.ResourceName,
|
||
ResourceType: window.ResourceType,
|
||
TriggerType: window.TriggerType,
|
||
TriggerID: window.TriggerID,
|
||
StartTime: window.StartTime,
|
||
EndTime: window.EndTime,
|
||
Status: string(window.Status),
|
||
DataPoints: points,
|
||
Summary: summary,
|
||
}
|
||
}
|
||
|
||
// eventCorrelatorProviderWrapper adapts proxmox.EventCorrelator to tools.EventCorrelatorProvider.
|
||
type eventCorrelatorProviderWrapper struct {
|
||
correlator *proxmox.EventCorrelator
|
||
}
|
||
|
||
func (w *eventCorrelatorProviderWrapper) GetCorrelationsForResource(resourceID string, window time.Duration) []tools.EventCorrelation {
|
||
if w.correlator == nil {
|
||
return nil
|
||
}
|
||
|
||
correlations := w.correlator.GetCorrelationsForResource(resourceID)
|
||
if len(correlations) == 0 {
|
||
return nil
|
||
}
|
||
|
||
result := make([]tools.EventCorrelation, 0, len(correlations))
|
||
for _, corr := range correlations {
|
||
result = append(result, tools.EventCorrelation{
|
||
EventType: string(corr.Event.Type),
|
||
Timestamp: corr.Event.Timestamp,
|
||
ResourceID: corr.Event.ResourceID,
|
||
ResourceName: corr.Event.ResourceName,
|
||
Description: corr.Explanation,
|
||
Metadata: map[string]interface{}{
|
||
"confidence": corr.Confidence,
|
||
"anomalies": len(corr.Anomalies),
|
||
"event_id": corr.Event.ID,
|
||
},
|
||
})
|
||
}
|
||
return result
|
||
}
|
||
|
||
// metricsSourceWrapper wraps monitoring.MetricsHistory to implement tools.MetricsSource
|
||
type metricsSourceWrapper struct {
|
||
history *monitoring.MetricsHistory
|
||
}
|
||
|
||
func (w *metricsSourceWrapper) GetGuestMetrics(guestID string, metricType string, duration time.Duration) []tools.RawMetricPoint {
|
||
points := w.history.GetGuestMetrics(guestID, metricType, duration)
|
||
return convertMetricPoints(points)
|
||
}
|
||
|
||
func (w *metricsSourceWrapper) GetNodeMetrics(nodeID string, metricType string, duration time.Duration) []tools.RawMetricPoint {
|
||
points := w.history.GetNodeMetrics(nodeID, metricType, duration)
|
||
return convertMetricPoints(points)
|
||
}
|
||
|
||
func (w *metricsSourceWrapper) GetAllGuestMetrics(guestID string, duration time.Duration) map[string][]tools.RawMetricPoint {
|
||
metricsMap := w.history.GetAllGuestMetrics(guestID, duration)
|
||
result := make(map[string][]tools.RawMetricPoint, len(metricsMap))
|
||
for key, points := range metricsMap {
|
||
result[key] = convertMetricPoints(points)
|
||
}
|
||
return result
|
||
}
|
||
|
||
func convertMetricPoints(points []monitoring.MetricPoint) []tools.RawMetricPoint {
|
||
result := make([]tools.RawMetricPoint, len(points))
|
||
for i, p := range points {
|
||
result[i] = tools.RawMetricPoint{
|
||
Value: p.Value,
|
||
Timestamp: p.Timestamp,
|
||
}
|
||
}
|
||
return result
|
||
}
|
||
|
||
// baselineSourceWrapper wraps baseline.Store to implement tools.BaselineSource
|
||
type baselineSourceWrapper struct {
|
||
store *ai.BaselineStore
|
||
}
|
||
|
||
func (w *baselineSourceWrapper) GetBaseline(resourceID, metric string) (mean, stddev float64, sampleCount int, ok bool) {
|
||
if w.store == nil {
|
||
return 0, 0, 0, false
|
||
}
|
||
baseline, found := w.store.GetBaseline(resourceID, metric)
|
||
if !found || baseline == nil {
|
||
return 0, 0, 0, false
|
||
}
|
||
return baseline.Mean, baseline.StdDev, baseline.SampleCount, true
|
||
}
|
||
|
||
func (w *baselineSourceWrapper) GetAllBaselines() map[string]map[string]tools.BaselineData {
|
||
if w.store == nil {
|
||
return nil
|
||
}
|
||
allFlat := w.store.GetAllBaselines()
|
||
if allFlat == nil {
|
||
return nil
|
||
}
|
||
|
||
result := make(map[string]map[string]tools.BaselineData)
|
||
for key, flat := range allFlat {
|
||
// key format is "resourceID:metric"
|
||
parts := strings.SplitN(key, ":", 2)
|
||
if len(parts) != 2 {
|
||
continue
|
||
}
|
||
resourceID, metric := parts[0], parts[1]
|
||
|
||
if result[resourceID] == nil {
|
||
result[resourceID] = make(map[string]tools.BaselineData)
|
||
}
|
||
result[resourceID][metric] = tools.BaselineData{
|
||
Mean: flat.Mean,
|
||
StdDev: flat.StdDev,
|
||
SampleCount: flat.Samples,
|
||
}
|
||
}
|
||
return result
|
||
}
|
||
|
||
// patternSourceWrapper wraps patterns.Detector to implement tools.PatternSource
|
||
type patternSourceWrapper struct {
|
||
detector *ai.PatternDetector
|
||
}
|
||
|
||
func (w *patternSourceWrapper) GetPatterns() []tools.PatternData {
|
||
if w.detector == nil {
|
||
return nil
|
||
}
|
||
|
||
patterns := w.detector.GetPatterns()
|
||
if patterns == nil {
|
||
return nil
|
||
}
|
||
|
||
result := make([]tools.PatternData, 0, len(patterns))
|
||
for _, p := range patterns {
|
||
if p == nil {
|
||
continue
|
||
}
|
||
result = append(result, tools.PatternData{
|
||
ResourceID: p.ResourceID,
|
||
PatternType: string(p.EventType),
|
||
Description: fmt.Sprintf("%s pattern with %d occurrences", p.EventType, p.Occurrences),
|
||
Confidence: p.Confidence,
|
||
LastSeen: p.LastOccurrence,
|
||
})
|
||
}
|
||
return result
|
||
}
|
||
|
||
func (w *patternSourceWrapper) GetPredictions() []tools.PredictionData {
|
||
if w.detector == nil {
|
||
return nil
|
||
}
|
||
|
||
predictions := w.detector.GetPredictions()
|
||
if predictions == nil {
|
||
return nil
|
||
}
|
||
|
||
result := make([]tools.PredictionData, 0, len(predictions))
|
||
for _, p := range predictions {
|
||
result = append(result, tools.PredictionData{
|
||
ResourceID: p.ResourceID,
|
||
IssueType: string(p.EventType),
|
||
PredictedTime: p.PredictedAt,
|
||
Confidence: p.Confidence,
|
||
Recommendation: p.Basis,
|
||
})
|
||
}
|
||
return result
|
||
}
|
||
|
||
// updatesConfigWrapper wraps config.Config to implement tools.UpdatesConfig
|
||
type updatesConfigWrapper struct {
|
||
cfg *config.Config
|
||
}
|
||
|
||
func (w *updatesConfigWrapper) IsDockerUpdateActionsEnabled() bool {
|
||
if w.cfg == nil {
|
||
return true // Default to enabled
|
||
}
|
||
return !w.cfg.DisableDockerUpdateActions
|
||
}
|
||
|
||
// StopAIChat stops the AI chat service
|
||
func (r *Router) StopAIChat(ctx context.Context) {
|
||
if r.aiHandler != nil {
|
||
if err := r.aiHandler.Stop(ctx); err != nil {
|
||
log.Error().Err(err).Msg("Failed to stop AI chat service")
|
||
}
|
||
}
|
||
}
|
||
|
||
// RestartAIChat restarts the AI chat service with updated configuration
|
||
// Call this when AI settings change that affect the service (e.g., model selection)
|
||
func (r *Router) RestartAIChat(ctx context.Context) {
|
||
if r.aiHandler != nil {
|
||
if err := r.aiHandler.Restart(ctx); err != nil {
|
||
log.Error().Err(err).Msg("Failed to restart AI chat service")
|
||
} else {
|
||
log.Info().Msg("AI chat service restarted with new configuration")
|
||
}
|
||
}
|
||
}
|
||
|
||
// StartRelay starts the relay client if configured and licensed.
|
||
func (r *Router) StartRelay(ctx context.Context) {
|
||
cfg, err := r.loadRelayConfigForRuntime(ctx)
|
||
if err != nil {
|
||
log.Error().Err(err).Msg("Failed to load relay config")
|
||
return
|
||
}
|
||
if !cfg.Enabled {
|
||
log.Debug().Msg("Relay not enabled, skipping")
|
||
return
|
||
}
|
||
|
||
// Check license
|
||
if r.licenseHandlers != nil {
|
||
svc := r.licenseHandlers.Service(ctx)
|
||
if svc != nil {
|
||
if err := svc.RequireFeature(featureRelayKey); err != nil {
|
||
log.Warn().Msg("Relay feature not licensed, skipping")
|
||
return
|
||
}
|
||
}
|
||
}
|
||
|
||
localAddr := fmt.Sprintf("127.0.0.1:%d", r.config.FrontendPort)
|
||
|
||
deps := relay.ClientDeps{
|
||
LicenseTokenFunc: func() string {
|
||
return r.relayRegistrationToken(context.Background())
|
||
},
|
||
TokenValidator: func(token string) bool {
|
||
config.Mu.Lock()
|
||
_, ok := r.config.ValidateAPIToken(token)
|
||
config.Mu.Unlock()
|
||
return ok
|
||
},
|
||
LocalAddr: localAddr,
|
||
ServerVersion: r.serverVersion,
|
||
IdentityPubKey: cfg.IdentityPublicKey,
|
||
IdentityPrivateKey: cfg.IdentityPrivateKey,
|
||
}
|
||
|
||
relayCtx, relayCancel := context.WithCancel(ctx)
|
||
client := relay.NewClient(*cfg, deps, log.Logger)
|
||
|
||
r.relayMu.Lock()
|
||
r.relayClient = client
|
||
r.relayCancel = relayCancel
|
||
r.relayMu.Unlock()
|
||
|
||
go func() {
|
||
if err := client.Run(relayCtx); err != nil && relayCtx.Err() == nil {
|
||
log.Error().Err(err).Msg("Relay client stopped unexpectedly")
|
||
}
|
||
}()
|
||
|
||
log.Info().Str("server_url", cfg.ServerURL).Msg("Relay client started")
|
||
}
|
||
|
||
// StopRelay stops the relay client.
|
||
func (r *Router) StopRelay() {
|
||
r.relayMu.Lock()
|
||
cancel := r.relayCancel
|
||
client := r.relayClient
|
||
r.relayClient = nil
|
||
r.relayCancel = nil
|
||
r.relayMu.Unlock()
|
||
|
||
if cancel != nil {
|
||
cancel()
|
||
}
|
||
if client != nil {
|
||
client.Close()
|
||
log.Info().Msg("Relay client stopped")
|
||
}
|
||
}
|
||
|
||
func (r *Router) handleGetRelayConfig(w http.ResponseWriter, req *http.Request) {
|
||
cfg, err := r.loadRelayConfigForRuntime(req.Context())
|
||
if err != nil {
|
||
http.Error(w, "failed to load relay config", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
// Omit the instance secret and private key from the response
|
||
resp := struct {
|
||
Enabled bool `json:"enabled"`
|
||
ServerURL string `json:"server_url"`
|
||
IdentityPublicKey string `json:"identity_public_key,omitempty"`
|
||
IdentityFingerprint string `json:"identity_fingerprint,omitempty"`
|
||
}{
|
||
Enabled: cfg.Enabled,
|
||
ServerURL: cfg.ServerURL,
|
||
IdentityPublicKey: cfg.IdentityPublicKey,
|
||
IdentityFingerprint: cfg.IdentityFingerprint,
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(resp)
|
||
}
|
||
|
||
func (r *Router) handleUpdateRelayConfig(w http.ResponseWriter, req *http.Request) {
|
||
var update struct {
|
||
Enabled *bool `json:"enabled"`
|
||
ServerURL *string `json:"server_url"`
|
||
InstanceSecret *string `json:"instance_secret"`
|
||
}
|
||
if err := json.NewDecoder(req.Body).Decode(&update); err != nil {
|
||
http.Error(w, "invalid request body", http.StatusBadRequest)
|
||
return
|
||
}
|
||
|
||
prev, err := r.persistence.LoadRelayConfig()
|
||
if err != nil {
|
||
http.Error(w, "failed to load relay config", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
// Apply updates to a copy
|
||
cfg := *prev
|
||
if update.Enabled != nil {
|
||
cfg.Enabled = *update.Enabled
|
||
}
|
||
if update.ServerURL != nil && *update.ServerURL != "" {
|
||
cfg.ServerURL = *update.ServerURL
|
||
}
|
||
if update.InstanceSecret != nil {
|
||
cfg.InstanceSecret = *update.InstanceSecret
|
||
}
|
||
|
||
// Generate identity keypair on first enable
|
||
identityGenerated := false
|
||
if cfg.Enabled && cfg.IdentityPrivateKey == "" {
|
||
privKey, pubKey, fp, err := relay.GenerateIdentityKeyPair()
|
||
if err != nil {
|
||
log.Error().Err(err).Msg("Failed to generate relay identity keypair")
|
||
http.Error(w, "failed to generate identity keypair", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
cfg.IdentityPrivateKey = privKey
|
||
cfg.IdentityPublicKey = pubKey
|
||
cfg.IdentityFingerprint = fp
|
||
identityGenerated = true
|
||
log.Info().Str("fingerprint", fp).Msg("Generated relay instance identity keypair")
|
||
}
|
||
|
||
if err := r.persistence.SaveRelayConfig(cfg); err != nil {
|
||
http.Error(w, "failed to save relay config", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
// Restart relay client if any connection-relevant field changed.
|
||
// Also restart when identity keypair was just generated so the running
|
||
// client picks up the new IdentityPubKey.
|
||
configChanged := cfg.Enabled != prev.Enabled ||
|
||
cfg.ServerURL != prev.ServerURL ||
|
||
cfg.InstanceSecret != prev.InstanceSecret ||
|
||
identityGenerated
|
||
if configChanged {
|
||
r.StopRelay()
|
||
if cfg.Enabled {
|
||
// Use Background context — the relay client must outlive this HTTP request.
|
||
r.StartRelay(context.Background())
|
||
}
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(map[string]string{"status": "ok"})
|
||
}
|
||
|
||
func (r *Router) handleGetRelayStatus(w http.ResponseWriter, req *http.Request) {
|
||
r.relayMu.RLock()
|
||
client := r.relayClient
|
||
r.relayMu.RUnlock()
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
if client == nil {
|
||
json.NewEncoder(w).Encode(relay.ClientStatus{})
|
||
return
|
||
}
|
||
json.NewEncoder(w).Encode(client.Status())
|
||
}
|
||
|
||
// startBaselineLearning runs a background loop that learns baselines from metrics history
|
||
// This enables anomaly detection by understanding what "normal" looks like for each resource
|
||
func (r *Router) startBaselineLearning(ctx context.Context, store *ai.BaselineStore, metricsHistory *monitoring.MetricsHistory) {
|
||
if store == nil || metricsHistory == nil {
|
||
return
|
||
}
|
||
|
||
// Learn every hour
|
||
ticker := time.NewTicker(1 * time.Hour)
|
||
defer ticker.Stop()
|
||
|
||
// Run initial learning after a short delay (allow metrics to accumulate)
|
||
initialDelay := time.NewTimer(5 * time.Minute)
|
||
defer initialDelay.Stop()
|
||
|
||
select {
|
||
case <-ctx.Done():
|
||
return
|
||
case <-initialDelay.C:
|
||
r.learnBaselines(store, metricsHistory)
|
||
}
|
||
|
||
log.Info().Msg("Baseline learning loop started")
|
||
|
||
for {
|
||
select {
|
||
case <-ctx.Done():
|
||
// Save baselines before exit
|
||
if err := store.Save(); err != nil {
|
||
log.Warn().Err(err).Msg("Failed to save baselines on shutdown")
|
||
}
|
||
log.Info().Msg("Baseline learning loop stopped")
|
||
return
|
||
case <-ticker.C:
|
||
r.learnBaselines(store, metricsHistory)
|
||
}
|
||
}
|
||
}
|
||
|
||
// learnBaselines updates baselines for all resources from metrics history.
|
||
// Uses ReadState typed views — the legacy GetState fallback was removed
|
||
// after ReadState became the sole state access mechanism (SRC-03e+).
|
||
func (r *Router) learnBaselines(store *ai.BaselineStore, metricsHistory *monitoring.MetricsHistory) {
|
||
if r.monitor == nil {
|
||
return
|
||
}
|
||
|
||
readState := r.monitor.GetUnifiedReadState()
|
||
if readState == nil {
|
||
return
|
||
}
|
||
|
||
learningWindow := 7 * 24 * time.Hour // Learn from 7 days of data
|
||
var learned int
|
||
|
||
// Use SourceID() for all ID lookups — metrics history is keyed by legacy
|
||
// source IDs (e.g. "node/pve", "qemu/100"), not unified registry IDs.
|
||
|
||
for _, node := range readState.Nodes() {
|
||
id := node.SourceID()
|
||
if id == "" {
|
||
continue
|
||
}
|
||
for _, metric := range []string{"cpu", "memory"} {
|
||
points := metricsHistory.GetNodeMetrics(id, metric, learningWindow)
|
||
if len(points) > 0 {
|
||
baselinePoints := make([]ai.BaselineMetricPoint, len(points))
|
||
for i, p := range points {
|
||
baselinePoints[i] = ai.BaselineMetricPoint{Value: p.Value, Timestamp: p.Timestamp}
|
||
}
|
||
if err := store.Learn(id, "node", metric, baselinePoints); err == nil {
|
||
learned++
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
for _, vm := range readState.VMs() {
|
||
if vm.Template() {
|
||
continue
|
||
}
|
||
id := vm.SourceID()
|
||
if id == "" {
|
||
continue
|
||
}
|
||
for _, metric := range []string{"cpu", "memory", "disk"} {
|
||
points := metricsHistory.GetGuestMetrics(id, metric, learningWindow)
|
||
if len(points) > 0 {
|
||
baselinePoints := make([]ai.BaselineMetricPoint, len(points))
|
||
for i, p := range points {
|
||
baselinePoints[i] = ai.BaselineMetricPoint{Value: p.Value, Timestamp: p.Timestamp}
|
||
}
|
||
if err := store.Learn(id, "vm", metric, baselinePoints); err == nil {
|
||
learned++
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
for _, ct := range readState.Containers() {
|
||
if ct.Template() {
|
||
continue
|
||
}
|
||
id := ct.SourceID()
|
||
if id == "" {
|
||
continue
|
||
}
|
||
for _, metric := range []string{"cpu", "memory", "disk"} {
|
||
points := metricsHistory.GetGuestMetrics(id, metric, learningWindow)
|
||
if len(points) > 0 {
|
||
baselinePoints := make([]ai.BaselineMetricPoint, len(points))
|
||
for i, p := range points {
|
||
baselinePoints[i] = ai.BaselineMetricPoint{Value: p.Value, Timestamp: p.Timestamp}
|
||
}
|
||
if err := store.Learn(id, "container", metric, baselinePoints); err == nil {
|
||
learned++
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Save after learning
|
||
if err := store.Save(); err != nil {
|
||
log.Warn().Err(err).Msg("Failed to save baselines")
|
||
}
|
||
|
||
log.Debug().
|
||
Int("baselines_updated", learned).
|
||
Int("resources", store.ResourceCount()).
|
||
Msg("Baseline learning complete")
|
||
}
|
||
|
||
// GetAlertTriggeredAnalyzer returns the alert-triggered analyzer for wiring into the monitor's alert callback
|
||
// This enables AI to analyze specific resources when alerts fire, providing token-efficient real-time insights
|
||
// GetLicenseHandlers returns the license handlers for external callers (e.g. telemetry).
|
||
func (r *Router) GetLicenseHandlers() *LicenseHandlers {
|
||
return r.licenseHandlers
|
||
}
|
||
|
||
// StopGrantRefresh stops all grant refresh and revocation poll loops across all tenants.
|
||
func (r *Router) StopGrantRefresh() {
|
||
if r.licenseHandlers != nil {
|
||
r.licenseHandlers.StopAllBackgroundLoops()
|
||
}
|
||
}
|
||
|
||
// SetTelemetryToggleFunc wires a callback that is invoked when the user
|
||
// toggles telemetry on or off at runtime via system settings.
|
||
func (r *Router) SetTelemetryToggleFunc(fn func(enabled bool)) {
|
||
if r.systemSettingsHandler != nil {
|
||
r.systemSettingsHandler.SetTelemetryToggleFunc(fn)
|
||
}
|
||
}
|
||
|
||
// SetTelemetryPreviewFunc wires the exact runtime telemetry preview callback
|
||
// into the system settings handler.
|
||
func (r *Router) SetTelemetryPreviewFunc(fn func() (telemetry.Ping, error)) {
|
||
if r.systemSettingsHandler != nil {
|
||
r.systemSettingsHandler.SetTelemetryPreviewFunc(fn)
|
||
}
|
||
}
|
||
|
||
// SetTelemetryResetFunc wires the telemetry install-ID reset callback into the
|
||
// system settings handler.
|
||
func (r *Router) SetTelemetryResetFunc(fn func() (telemetry.Ping, error)) {
|
||
if r.systemSettingsHandler != nil {
|
||
r.systemSettingsHandler.SetTelemetryResetFunc(fn)
|
||
}
|
||
}
|
||
|
||
func (r *Router) GetAlertTriggeredAnalyzer() aicontracts.AlertAnalyzer {
|
||
if r.aiSettingsHandler != nil {
|
||
return r.aiSettingsHandler.GetAlertTriggeredAnalyzer(context.Background())
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// WireAlertTriggeredAI connects the alert-triggered AI analyzer to the monitor's alert callback
|
||
// This should be called after StartPatrol() to ensure the analyzer is initialized
|
||
// WireAlertTriggeredAI connects the alert-triggered AI analyzer to the monitor's alert callback
|
||
// This should be called after StartPatrol() to ensure the analyzer is initialized
|
||
func (r *Router) WireAlertTriggeredAI() {
|
||
// 1. Get the AI service (default tenant for now)
|
||
if r.aiSettingsHandler == nil {
|
||
log.Debug().Msg("AI settings handler not available for wiring")
|
||
return
|
||
}
|
||
aiService := r.aiSettingsHandler.GetAIService(context.Background())
|
||
if aiService == nil {
|
||
log.Debug().Msg("AI service not available for wiring")
|
||
return
|
||
}
|
||
|
||
// 2. Get the Monitor (The Trigger)
|
||
if r.monitor == nil {
|
||
log.Debug().Msg("Monitor not available for AI alert callback")
|
||
return
|
||
}
|
||
|
||
// 3. Connect alert-fired events to the dedicated alert-triggered analyzer.
|
||
// Patrol's event-triggered runs are owned by the canonical alert bridge /
|
||
// trigger-manager path, so this callback should not enqueue Patrol directly.
|
||
r.monitor.SetAlertTriggeredAICallback(func(alert *alerts.Alert) {
|
||
if analyzer := r.GetAlertTriggeredAnalyzer(); analyzer != nil {
|
||
log.Info().Str("alert_identifier", alert.ID).Msg("Alert fired leading to alert-triggered analysis")
|
||
analyzer.OnAlertFired(alert)
|
||
}
|
||
})
|
||
|
||
log.Info().Msg("Alert-triggered AI analyzer wired to monitor")
|
||
}
|
||
|
||
// Deprecated: deriveResourceTypeFromAlert uses heuristic string matching.
|
||
// Use alert.Metadata["resourceType"] as the canonical source instead.
|
||
// This function is retained for test backward compatibility only.
|
||
// See: Appendix C of alerts-unified-resource-hardening-plan-2026-02.md.
|
||
//
|
||
// deriveResourceTypeFromAlert derives the resource type from an alert.
|
||
func deriveResourceTypeFromAlert(alert *alerts.Alert) string {
|
||
if alert == nil {
|
||
return ""
|
||
}
|
||
|
||
// Prefer explicit canonical resource type from alert metadata.
|
||
if alert.Metadata != nil {
|
||
if raw, ok := alert.Metadata["resourceType"].(string); ok {
|
||
switch canonicalAlertResourceTypeToken(raw) {
|
||
case "vm":
|
||
return "vm"
|
||
case "system-container", "oci-container":
|
||
return "system-container"
|
||
case "app-container", "docker-host":
|
||
return "app-container"
|
||
case "node":
|
||
return "node"
|
||
case "storage", "disk":
|
||
return "storage"
|
||
case "pbs":
|
||
return "pbs"
|
||
case "k8s", "k8s-node", "k8s-cluster":
|
||
return "k8s"
|
||
}
|
||
}
|
||
}
|
||
|
||
// Infer from resource ID patterns.
|
||
resourceID := strings.ToLower(strings.TrimSpace(alert.ResourceID))
|
||
switch {
|
||
case strings.Contains(resourceID, "/node/"),
|
||
strings.HasPrefix(resourceID, "node/"),
|
||
strings.HasPrefix(resourceID, "node:"):
|
||
return "node"
|
||
case strings.Contains(resourceID, "/qemu/"),
|
||
strings.HasPrefix(resourceID, "vm:"),
|
||
strings.HasPrefix(resourceID, "vm/"):
|
||
return "vm"
|
||
case strings.Contains(resourceID, "/lxc/"),
|
||
strings.HasPrefix(resourceID, "system-container:"),
|
||
strings.HasPrefix(resourceID, "system-container/"),
|
||
strings.HasPrefix(resourceID, "oci-container:"),
|
||
strings.HasPrefix(resourceID, "oci-container/"):
|
||
return "system-container"
|
||
case strings.Contains(resourceID, "docker:"),
|
||
strings.HasPrefix(resourceID, "app-container:"),
|
||
strings.HasPrefix(resourceID, "app-container/"),
|
||
strings.HasPrefix(resourceID, "docker-host:"),
|
||
strings.HasPrefix(resourceID, "docker-host/"),
|
||
strings.Contains(resourceID, "docker"):
|
||
return "app-container"
|
||
case strings.HasPrefix(resourceID, "storage/"), strings.Contains(resourceID, "storage"):
|
||
return "storage"
|
||
case strings.HasPrefix(resourceID, "pbs"), strings.Contains(resourceID, "/pbs/"):
|
||
return "pbs"
|
||
case strings.Contains(resourceID, "k8s"), strings.Contains(resourceID, "kubernetes"):
|
||
return "k8s"
|
||
}
|
||
|
||
// Final fallback by alert type for broad non-workload classes.
|
||
alertType := strings.ToLower(strings.TrimSpace(alert.Type))
|
||
switch {
|
||
case strings.HasPrefix(alertType, "node"):
|
||
return "node"
|
||
case strings.Contains(alertType, "storage"):
|
||
return "storage"
|
||
case strings.Contains(alertType, "pbs"):
|
||
return "pbs"
|
||
case strings.Contains(alertType, "kubernetes"), strings.Contains(alertType, "k8s"):
|
||
return "k8s"
|
||
default:
|
||
return "vm"
|
||
}
|
||
}
|
||
|
||
func canonicalAlertResourceTypeToken(raw string) string {
|
||
normalized := strings.ToLower(strings.TrimSpace(raw))
|
||
if normalized == "" || unifiedresources.IsUnsupportedLegacyResourceTypeAlias(normalized) {
|
||
return ""
|
||
}
|
||
switch normalized {
|
||
case "vm", "system-container", "oci-container", "app-container", "node", "storage", "disk", "agent", "docker-host", "pbs", "pmg", "k8s", "k8s-node", "k8s-cluster":
|
||
return normalized
|
||
default:
|
||
return ""
|
||
}
|
||
}
|
||
|
||
// reloadSystemSettings loads system settings from disk and caches them
|
||
func (r *Router) reloadSystemSettings() {
|
||
r.settingsMu.Lock()
|
||
defer r.settingsMu.Unlock()
|
||
|
||
// Load from disk
|
||
if systemSettings, err := r.persistence.LoadSystemSettings(); err == nil && systemSettings != nil {
|
||
r.cachedAllowEmbedding = systemSettings.AllowEmbedding
|
||
r.cachedAllowedOrigins = systemSettings.AllowedEmbedOrigins
|
||
|
||
// Update HideLocalLogin so it takes effect immediately without restart
|
||
// BUT respect environment variable override if present
|
||
if !r.config.EnvOverrides["PULSE_AUTH_HIDE_LOCAL_LOGIN"] {
|
||
r.config.HideLocalLogin = systemSettings.HideLocalLogin
|
||
}
|
||
|
||
// Update webhook allowed private CIDRs in notification manager
|
||
if r.monitor != nil {
|
||
if nm := r.monitor.GetNotificationManager(); nm != nil {
|
||
if err := nm.UpdateAllowedPrivateCIDRs(systemSettings.WebhookAllowedPrivateCIDRs); err != nil {
|
||
log.Error().Err(err).Msg("Failed to update webhook allowed private CIDRs during settings reload")
|
||
}
|
||
}
|
||
}
|
||
} else {
|
||
// On error, use safe defaults
|
||
r.cachedAllowEmbedding = false
|
||
r.cachedAllowedOrigins = ""
|
||
}
|
||
}
|
||
|
||
// ServeHTTP implements http.Handler
|
||
func (r *Router) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||
// Prevent path traversal attacks
|
||
// We strictly block ".." to prevent directory traversal
|
||
if strings.Contains(req.URL.Path, "..") {
|
||
// Return 401 for API paths to match expected test behavior
|
||
if strings.HasPrefix(req.URL.Path, "/api/") {
|
||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||
} else {
|
||
http.Error(w, "Invalid path", http.StatusBadRequest)
|
||
}
|
||
log.Warn().
|
||
Str("ip", req.RemoteAddr).
|
||
Str("path", req.URL.Path).
|
||
Msg("Path traversal attempt blocked")
|
||
return
|
||
}
|
||
|
||
// Get cached system settings (loaded once at startup, not from disk every request)
|
||
r.capturePublicURLFromRequest(req)
|
||
r.settingsMu.RLock()
|
||
allowEmbedding := r.cachedAllowEmbedding
|
||
allowedEmbedOrigins := r.cachedAllowedOrigins
|
||
r.settingsMu.RUnlock()
|
||
|
||
// Apply security headers with embedding configuration
|
||
SecurityHeadersWithConfig(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
|
||
// Prevent caching of API responses that may contain sensitive data
|
||
// (auth state, infrastructure topology, tokens, settings).
|
||
// Static assets and HTML have their own cache headers set elsewhere.
|
||
if strings.HasPrefix(req.URL.Path, "/api/") {
|
||
w.Header().Set("Cache-Control", "no-store")
|
||
}
|
||
|
||
// Add CORS headers if configured
|
||
if r.config.AllowedOrigins != "" {
|
||
reqOrigin := req.Header.Get("Origin")
|
||
allowedOrigin := ""
|
||
|
||
if r.config.AllowedOrigins == "*" {
|
||
allowedOrigin = "*"
|
||
} else if reqOrigin != "" {
|
||
// Parse comma-separated origins and check for match
|
||
origins := strings.Split(r.config.AllowedOrigins, ",")
|
||
for _, o := range origins {
|
||
o = strings.TrimSpace(o)
|
||
if o == "" {
|
||
continue
|
||
}
|
||
if o == reqOrigin {
|
||
allowedOrigin = o
|
||
break
|
||
}
|
||
}
|
||
} else {
|
||
// No Origin header — same-origin or non-browser (e.g. curl).
|
||
// CORS headers are only meaningful when a browser sends an Origin,
|
||
// so skip setting any CORS headers for these requests.
|
||
}
|
||
|
||
if allowedOrigin != "" {
|
||
w.Header().Set("Access-Control-Allow-Origin", allowedOrigin)
|
||
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
|
||
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, X-API-Token, X-CSRF-Token, X-Setup-Token")
|
||
w.Header().Set("Access-Control-Expose-Headers", "X-CSRF-Token, X-Authenticated-User, X-Auth-Method")
|
||
// Allow credentials when origin is specific (not *)
|
||
if allowedOrigin != "*" {
|
||
w.Header().Set("Access-Control-Allow-Credentials", "true")
|
||
// Must add Vary: Origin when Origin is used to decide the response
|
||
w.Header().Add("Vary", "Origin")
|
||
}
|
||
// Cache preflight results for 1 hour (only meaningful on OPTIONS).
|
||
if req.Method == "OPTIONS" {
|
||
w.Header().Set("Access-Control-Max-Age", "3600")
|
||
}
|
||
}
|
||
}
|
||
|
||
// Handle preflight requests
|
||
if req.Method == "OPTIONS" {
|
||
w.WriteHeader(http.StatusOK)
|
||
return
|
||
}
|
||
|
||
// Check if we need authentication
|
||
needsAuth := true
|
||
clientIP := GetClientIP(req)
|
||
|
||
// Recovery mechanism: Check if recovery mode is enabled
|
||
recoveryFile := filepath.Join(r.config.DataPath, ".auth_recovery")
|
||
if _, err := os.Stat(recoveryFile); err == nil {
|
||
// Recovery mode is enabled - allow local access only
|
||
log.Debug().
|
||
Str("recovery_file", recoveryFile).
|
||
Str("client_ip", clientIP).
|
||
Str("remote_addr", req.RemoteAddr).
|
||
Str("path", req.URL.Path).
|
||
Bool("file_exists", err == nil).
|
||
Msg("Checking auth recovery mode")
|
||
if isDirectLoopbackRequest(req) {
|
||
log.Warn().
|
||
Str("recovery_file", recoveryFile).
|
||
Str("client_ip", clientIP).
|
||
Msg("AUTH RECOVERY MODE: Allowing local access without authentication")
|
||
// Allow access but add a warning header
|
||
w.Header().Set("X-Auth-Recovery", "true")
|
||
// Recovery mode bypasses auth for localhost
|
||
needsAuth = false
|
||
}
|
||
}
|
||
|
||
if needsAuth {
|
||
// Normal authentication check
|
||
// Normalize path to handle double slashes (e.g., //download -> /download)
|
||
// This prevents auth bypass failures when URLs have trailing slashes
|
||
normalizedPath := path.Clean(req.URL.Path)
|
||
|
||
// Skip auth for certain public endpoints and static assets
|
||
publicPaths := []string{
|
||
"/api/health",
|
||
"/api/security/status",
|
||
"/api/security/validate-bootstrap-token",
|
||
"/api/security/quick-setup", // Handler does its own auth (bootstrap token or session)
|
||
"/api/version",
|
||
"/api/login", // Add login endpoint as public
|
||
"/api/public/signup", // Hosted mode: public signup
|
||
"/api/public/magic-link/request", // Hosted mode: request magic link
|
||
"/api/public/magic-link/verify", // Hosted mode: verify magic link
|
||
"/api/cloud/handoff/exchange", // Hosted mode: control-plane workspace handoff (token-authenticated)
|
||
"/api/webhooks/stripe", // Hosted mode: Stripe webhook (signature verification is auth)
|
||
"/install.sh", // Unified agent installer
|
||
"/install.ps1", // Unified agent Windows installer
|
||
"/download/pulse-agent", // Unified agent binary
|
||
"/api/agent/version", // Agent update checks need to work before auth
|
||
"/api/agent/ws", // Agent WebSocket has its own auth via registration
|
||
"/api/server/info", // Server info for installer script
|
||
"/api/ai/oauth/callback", // OAuth callback from Anthropic for Claude subscription auth
|
||
"/auth/cloud-handoff", // Cloud control plane handoff (token-authenticated)
|
||
"/auth/trial-activate", // Hosted trial signup callback (token-authenticated)
|
||
"/auth/license-purchase-activate", // Self-hosted checkout return (session-authenticated via commercial backend)
|
||
}
|
||
|
||
// Also allow static assets without auth (JS, CSS, etc)
|
||
// These MUST be accessible for the login page to work
|
||
// Frontend routes (non-API, non-download) should also be public
|
||
// because authentication is handled by the frontend after page load
|
||
isFrontendRoute := !strings.HasPrefix(req.URL.Path, "/api/") &&
|
||
!strings.HasPrefix(req.URL.Path, "/ws") &&
|
||
!strings.HasPrefix(req.URL.Path, "/download/") &&
|
||
req.URL.Path != "/simple-stats" &&
|
||
req.URL.Path != "/install.sh" &&
|
||
req.URL.Path != "/install.ps1"
|
||
|
||
isStaticAsset := strings.HasPrefix(req.URL.Path, "/assets/") ||
|
||
strings.HasPrefix(req.URL.Path, "/@vite/") ||
|
||
strings.HasPrefix(req.URL.Path, "/@solid-refresh") ||
|
||
strings.HasPrefix(req.URL.Path, "/src/") ||
|
||
strings.HasPrefix(req.URL.Path, "/node_modules/") ||
|
||
req.URL.Path == "/" ||
|
||
req.URL.Path == "/index.html" ||
|
||
req.URL.Path == "/favicon.ico" ||
|
||
req.URL.Path == "/logo.svg" ||
|
||
strings.HasSuffix(req.URL.Path, ".js") ||
|
||
strings.HasSuffix(req.URL.Path, ".css") ||
|
||
strings.HasSuffix(req.URL.Path, ".map") ||
|
||
strings.HasSuffix(req.URL.Path, ".ts") ||
|
||
strings.HasSuffix(req.URL.Path, ".tsx") ||
|
||
strings.HasSuffix(req.URL.Path, ".mjs") ||
|
||
strings.HasSuffix(req.URL.Path, ".jsx")
|
||
|
||
isPublic := isStaticAsset || isFrontendRoute
|
||
for _, path := range publicPaths {
|
||
if normalizedPath == path {
|
||
isPublic = true
|
||
break
|
||
}
|
||
}
|
||
|
||
// Per-provider SSO OIDC routes are public (login initiation + callback)
|
||
if strings.HasPrefix(normalizedPath, "/api/oidc/") {
|
||
oidcParts := strings.Split(strings.TrimPrefix(normalizedPath, "/"), "/")
|
||
if len(oidcParts) >= 4 && (oidcParts[3] == "login" || oidcParts[3] == "callback") {
|
||
isPublic = true
|
||
}
|
||
}
|
||
|
||
// Per-provider SSO SAML routes are public (login, ACS, metadata, SLO)
|
||
if strings.HasPrefix(normalizedPath, "/api/saml/") {
|
||
samlParts := strings.Split(strings.TrimPrefix(normalizedPath, "/"), "/")
|
||
if len(samlParts) >= 4 {
|
||
switch samlParts[3] {
|
||
case "login", "acs", "metadata", "slo", "logout":
|
||
isPublic = true
|
||
}
|
||
}
|
||
}
|
||
|
||
// Special case: setup-script should be public because it authenticates with setup tokens.
|
||
if normalizedPath == "/api/setup-script" {
|
||
// The script itself prompts for a setup token.
|
||
isPublic = true
|
||
}
|
||
|
||
// Allow temperature verification endpoint when a setup token is provided
|
||
if normalizedPath == "/api/system/verify-temperature-ssh" && r.configHandlers != nil {
|
||
if r.isValidSetupTokenForRequest(req) {
|
||
isPublic = true
|
||
}
|
||
}
|
||
|
||
// Allow SSH config endpoint when a setup token is provided
|
||
if normalizedPath == "/api/system/ssh-config" && r.configHandlers != nil {
|
||
if r.isValidSetupTokenForRequest(req) {
|
||
isPublic = true
|
||
}
|
||
}
|
||
|
||
// Auto-register endpoint needs to be public (validates tokens internally)
|
||
// BUT the tokens must be generated by authenticated users via setup-script-url
|
||
if normalizedPath == "/api/auto-register" {
|
||
isPublic = true
|
||
}
|
||
|
||
// Dev mode bypass for admin endpoints (disabled by default)
|
||
if adminBypassEnabled() {
|
||
log.Debug().
|
||
Str("path", req.URL.Path).
|
||
Msg("Admin bypass enabled - skipping global auth")
|
||
needsAuth = false
|
||
}
|
||
|
||
// Check auth for protected routes (only if auth is needed)
|
||
if needsAuth && !isPublic && !CheckAuth(r.config, w, req) {
|
||
// Never send WWW-Authenticate - use custom login page
|
||
// For API requests, return JSON
|
||
if strings.HasPrefix(req.URL.Path, "/api/") || strings.Contains(req.Header.Get("Accept"), "application/json") {
|
||
w.Header().Set("Content-Type", "application/json")
|
||
w.WriteHeader(http.StatusUnauthorized)
|
||
w.Write([]byte(`{"error":"Authentication required"}`))
|
||
} else {
|
||
http.Error(w, "Authentication required", http.StatusUnauthorized)
|
||
}
|
||
log.Warn().
|
||
Str("ip", req.RemoteAddr).
|
||
Str("path", req.URL.Path).
|
||
Msg("Unauthorized access attempt")
|
||
return
|
||
}
|
||
}
|
||
// Check CSRF for state-changing requests.
|
||
// CSRF is only needed when using session-based auth.
|
||
skipCSRF := false
|
||
// Quick setup can run before auth exists. Keep bootstrap/recovery flows usable
|
||
// without a prior session+CSRF pair, but enforce CSRF once auth is configured.
|
||
authConfigured := (r.config.AuthUser != "" && r.config.AuthPass != "") ||
|
||
r.config.HasAPITokens() ||
|
||
r.config.ProxyAuthSecret != "" ||
|
||
(func() bool {
|
||
ssoCfg := r.ensureSSOConfig()
|
||
return ssoCfg != nil && ssoCfg.HasEnabledProviders()
|
||
})()
|
||
validRecoveryToken := false
|
||
if recoveryToken := strings.TrimSpace(req.Header.Get("X-Recovery-Token")); recoveryToken != "" {
|
||
validRecoveryToken = GetRecoveryTokenStore().IsRecoveryTokenValidConstantTime(recoveryToken)
|
||
}
|
||
if req.URL.Path == "/api/security/quick-setup" &&
|
||
(!authConfigured || validRecoveryToken) {
|
||
skipCSRF = true
|
||
}
|
||
// Skip CSRF for setup-script-url endpoint (generates temporary tokens, not a state change)
|
||
if req.URL.Path == "/api/setup-script-url" {
|
||
skipCSRF = true
|
||
}
|
||
// Skip CSRF for bootstrap token validation (used during initial setup before session exists)
|
||
if req.URL.Path == "/api/security/validate-bootstrap-token" {
|
||
skipCSRF = true
|
||
}
|
||
// Skip CSRF for login to avoid blocking re-auth when a stale session cookie exists.
|
||
if req.URL.Path == "/api/login" {
|
||
skipCSRF = true
|
||
}
|
||
// Skip CSRF for SSO login/callback endpoints (OIDC and SAML)
|
||
if strings.HasPrefix(req.URL.Path, "/api/oidc/") || strings.HasPrefix(req.URL.Path, "/api/saml/") {
|
||
skipCSRF = true
|
||
}
|
||
// Skip CSRF for hosted public endpoints (may be called without a session or with a stale cookie).
|
||
if req.URL.Path == "/api/public/signup" || req.URL.Path == "/api/public/magic-link/request" {
|
||
skipCSRF = true
|
||
}
|
||
// Skip CSRF for cloud handoff (GET with token param, no prior session).
|
||
if req.URL.Path == "/auth/cloud-handoff" {
|
||
skipCSRF = true
|
||
}
|
||
// Skip CSRF for hosted trial activation callback (GET with signed token).
|
||
if req.URL.Path == "/auth/trial-activate" {
|
||
skipCSRF = true
|
||
}
|
||
// Skip CSRF for self-hosted checkout activation return (POST from Pulse Account, no prior session required).
|
||
if req.URL.Path == "/auth/license-purchase-activate" {
|
||
skipCSRF = true
|
||
}
|
||
// Skip CSRF for control-plane workspace handoff exchange (POST with signed handoff token).
|
||
if req.URL.Path == "/api/cloud/handoff/exchange" {
|
||
skipCSRF = true
|
||
}
|
||
if strings.HasPrefix(req.URL.Path, "/api/") && !skipCSRF && isValidProxyAuthRequest(r.config, req) && isCrossSiteBrowserRequest(req) {
|
||
http.Error(w, "CSRF origin validation failed", http.StatusForbidden)
|
||
LogAuditEventForTenant(GetOrgID(req.Context()), "csrf_failure", "", GetClientIP(req), req.URL.Path, false, "Cross-site browser mutation blocked for proxy auth")
|
||
return
|
||
}
|
||
if strings.HasPrefix(req.URL.Path, "/api/") && !skipCSRF && !CheckCSRF(w, req) {
|
||
http.Error(w, "CSRF token validation failed", http.StatusForbidden)
|
||
LogAuditEventForTenant(GetOrgID(req.Context()), "csrf_failure", "", GetClientIP(req), req.URL.Path, false, "Invalid CSRF token")
|
||
return
|
||
}
|
||
|
||
// Issue CSRF token for GET requests if session exists but CSRF cookie is missing
|
||
// This ensures the frontend has a token before making POST requests
|
||
if req.Method == "GET" && strings.HasPrefix(req.URL.Path, "/api/") {
|
||
sessionCookie, err := readSessionCookie(req)
|
||
if err == nil && sessionCookie.Value != "" {
|
||
// Check if CSRF cookie exists
|
||
_, csrfErr := req.Cookie(CookieNameCSRF)
|
||
if csrfErr != nil {
|
||
// Session exists but no CSRF cookie - issue one
|
||
csrfToken := generateCSRFToken(sessionCookie.Value)
|
||
isSecure, sameSitePolicy := getCookieSettings(req)
|
||
http.SetCookie(w, &http.Cookie{
|
||
Name: CookieNameCSRF,
|
||
Value: csrfToken,
|
||
Path: "/",
|
||
Secure: isSecure,
|
||
SameSite: sameSitePolicy,
|
||
MaxAge: 86400,
|
||
})
|
||
}
|
||
}
|
||
}
|
||
|
||
// Rate limiting is now handled by UniversalRateLimitMiddleware
|
||
// No need for duplicate rate limiting logic here
|
||
|
||
// Log request
|
||
start := time.Now()
|
||
|
||
// Fix for issue #334: Custom routing to prevent ServeMux's "./" redirect
|
||
// When accessing without trailing slash, ServeMux redirects to "./" which is wrong
|
||
// We handle routing manually to avoid this issue
|
||
|
||
// Check if this is an API or WebSocket route
|
||
log.Debug().Str("path", req.URL.Path).Msg("Routing request")
|
||
|
||
if strings.HasPrefix(req.URL.Path, "/api/") ||
|
||
strings.HasPrefix(req.URL.Path, "/ws") ||
|
||
strings.HasPrefix(req.URL.Path, "/download/") ||
|
||
strings.HasPrefix(req.URL.Path, "/auth/") ||
|
||
strings.HasPrefix(req.URL.Path, "/debug/pprof") ||
|
||
req.URL.Path == "/simple-stats" ||
|
||
path.Clean(req.URL.Path) == "/install.sh" ||
|
||
path.Clean(req.URL.Path) == "/install.ps1" {
|
||
// Use the mux for API and special routes
|
||
r.mux.ServeHTTP(w, req)
|
||
} else {
|
||
// Serve frontend for all other paths (including root)
|
||
handler := serveFrontendHandler()
|
||
handler(w, req)
|
||
}
|
||
|
||
log.Debug().
|
||
Str("method", req.Method).
|
||
Str("path", req.URL.Path).
|
||
Dur("duration", time.Since(start)).
|
||
Msg("Request handled")
|
||
}), allowEmbedding, allowedEmbedOrigins, utils.GetenvTrim("FRONTEND_DEV_SERVER") != "").ServeHTTP(w, req)
|
||
}
|
||
|
||
func (r *Router) capturePublicURLFromRequest(req *http.Request) {
|
||
if req == nil || r == nil || r.config == nil {
|
||
return
|
||
}
|
||
|
||
// Hosted mode must never derive a "public" URL from inbound requests.
|
||
// It is too easy to abuse Host / forwarded headers and poison config.
|
||
if r.hostedMode {
|
||
return
|
||
}
|
||
|
||
if !canCapturePublicURL(r.config, req) {
|
||
return
|
||
}
|
||
|
||
if r.config.EnvOverrides != nil && r.config.EnvOverrides["publicURL"] {
|
||
return
|
||
}
|
||
|
||
peerIP := extractRemoteIP(req.RemoteAddr)
|
||
trustedProxy := isTrustedProxyIP(peerIP)
|
||
|
||
rawHost := ""
|
||
if trustedProxy {
|
||
rawHost = firstForwardedValue(req.Header.Get("X-Forwarded-Host"))
|
||
}
|
||
if rawHost == "" {
|
||
rawHost = req.Host
|
||
}
|
||
hostWithPort, hostOnly := sanitizeForwardedHost(rawHost)
|
||
if hostWithPort == "" {
|
||
return
|
||
}
|
||
if isLoopbackHost(hostOnly) {
|
||
return
|
||
}
|
||
|
||
rawProto := ""
|
||
if trustedProxy {
|
||
rawProto = firstForwardedValue(req.Header.Get("X-Forwarded-Proto"))
|
||
if rawProto == "" {
|
||
rawProto = firstForwardedValue(req.Header.Get("X-Forwarded-Scheme"))
|
||
}
|
||
}
|
||
scheme := strings.ToLower(strings.TrimSpace(rawProto))
|
||
switch scheme {
|
||
case "https", "http":
|
||
// supported values
|
||
default:
|
||
if req.TLS != nil {
|
||
scheme = "https"
|
||
} else {
|
||
scheme = "http"
|
||
}
|
||
}
|
||
if scheme == "" {
|
||
scheme = "http"
|
||
}
|
||
|
||
if _, _, err := net.SplitHostPort(hostWithPort); err != nil {
|
||
if forwardedPort := firstForwardedValue(req.Header.Get("X-Forwarded-Port")); forwardedPort != "" {
|
||
if shouldAppendForwardedPort(forwardedPort, scheme) {
|
||
if strings.Contains(hostWithPort, ":") && !strings.HasPrefix(hostWithPort, "[") {
|
||
hostWithPort = fmt.Sprintf("[%s]", hostWithPort)
|
||
} else if strings.HasPrefix(hostWithPort, "[") && !strings.Contains(hostWithPort, "]") {
|
||
hostWithPort = fmt.Sprintf("[%s]", strings.TrimPrefix(hostWithPort, "["))
|
||
}
|
||
hostWithPort = fmt.Sprintf("%s:%s", hostWithPort, forwardedPort)
|
||
}
|
||
}
|
||
}
|
||
|
||
candidate := fmt.Sprintf("%s://%s", scheme, hostWithPort)
|
||
normalizedCandidate := strings.TrimRight(strings.TrimSpace(candidate), "/")
|
||
|
||
r.publicURLMu.Lock()
|
||
if r.publicURLDetected {
|
||
r.publicURLMu.Unlock()
|
||
return
|
||
}
|
||
|
||
current := strings.TrimRight(strings.TrimSpace(r.config.PublicURL), "/")
|
||
if current != "" {
|
||
// If explicitly configured, never overwrite from request
|
||
r.publicURLDetected = true
|
||
r.publicURLMu.Unlock()
|
||
return
|
||
}
|
||
|
||
r.config.PublicURL = normalizedCandidate
|
||
r.publicURLDetected = true
|
||
r.publicURLMu.Unlock()
|
||
|
||
log.Info().
|
||
Str("publicURL", normalizedCandidate).
|
||
Msg("Detected public URL from inbound request; using for notifications")
|
||
|
||
if r.monitor != nil {
|
||
if mgr := r.monitor.GetNotificationManager(); mgr != nil {
|
||
mgr.SetPublicURL(normalizedCandidate)
|
||
}
|
||
}
|
||
}
|
||
|
||
func firstForwardedValue(header string) string {
|
||
if header == "" {
|
||
return ""
|
||
}
|
||
parts := strings.Split(header, ",")
|
||
return strings.TrimSpace(parts[0])
|
||
}
|
||
|
||
func sanitizeForwardedHost(raw string) (string, string) {
|
||
host := strings.TrimSpace(raw)
|
||
if host == "" {
|
||
return "", ""
|
||
}
|
||
|
||
host = strings.TrimPrefix(host, "http://")
|
||
host = strings.TrimPrefix(host, "https://")
|
||
host = strings.TrimSpace(strings.TrimSuffix(host, "/"))
|
||
if host == "" {
|
||
return "", ""
|
||
}
|
||
|
||
hostOnly := host
|
||
if h, _, err := net.SplitHostPort(hostOnly); err == nil {
|
||
hostOnly = h
|
||
}
|
||
hostOnly = strings.Trim(hostOnly, "[]")
|
||
|
||
return host, hostOnly
|
||
}
|
||
|
||
func isLoopbackHost(host string) bool {
|
||
if host == "" {
|
||
return true
|
||
}
|
||
lower := strings.ToLower(host)
|
||
if lower == "localhost" {
|
||
return true
|
||
}
|
||
if ip := net.ParseIP(host); ip != nil {
|
||
if ip.IsLoopback() || ip.IsUnspecified() {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func shouldAppendForwardedPort(port, scheme string) bool {
|
||
if port == "" {
|
||
return false
|
||
}
|
||
if _, err := strconv.Atoi(port); err != nil {
|
||
return false
|
||
}
|
||
if scheme == "https" && port == "443" {
|
||
return false
|
||
}
|
||
if scheme == "http" && port == "80" {
|
||
return false
|
||
}
|
||
return true
|
||
}
|
||
|
||
func isValidProxyAuthRequest(cfg *config.Config, req *http.Request) bool {
|
||
if cfg == nil || req == nil || cfg.ProxyAuthSecret == "" {
|
||
return false
|
||
}
|
||
if strings.TrimSpace(req.Header.Get("X-Proxy-Secret")) == "" {
|
||
return false
|
||
}
|
||
valid, _, _ := CheckProxyAuth(cfg, req)
|
||
return valid
|
||
}
|
||
|
||
func requestOrigin(req *http.Request) string {
|
||
if req == nil {
|
||
return ""
|
||
}
|
||
host := strings.TrimSpace(req.Host)
|
||
if host == "" {
|
||
return ""
|
||
}
|
||
|
||
scheme := "http"
|
||
if isConnectionSecure(req) {
|
||
scheme = "https"
|
||
}
|
||
return scheme + "://" + host
|
||
}
|
||
|
||
func canonicalOrigin(raw string) (scheme, host, port string, ok bool) {
|
||
u, err := url.Parse(strings.TrimSpace(raw))
|
||
if err != nil || u == nil {
|
||
return "", "", "", false
|
||
}
|
||
|
||
scheme = strings.ToLower(strings.TrimSpace(u.Scheme))
|
||
host = strings.ToLower(strings.TrimSpace(u.Hostname()))
|
||
port = strings.TrimSpace(u.Port())
|
||
if scheme == "" || host == "" {
|
||
return "", "", "", false
|
||
}
|
||
if port == "" {
|
||
switch scheme {
|
||
case "https":
|
||
port = "443"
|
||
case "http":
|
||
port = "80"
|
||
}
|
||
}
|
||
return scheme, host, port, true
|
||
}
|
||
|
||
func sameOrigin(left, right string) bool {
|
||
schemeL, hostL, portL, okL := canonicalOrigin(left)
|
||
schemeR, hostR, portR, okR := canonicalOrigin(right)
|
||
if !okL || !okR {
|
||
return false
|
||
}
|
||
return schemeL == schemeR && hostL == hostR && portL == portR
|
||
}
|
||
|
||
// isCrossSiteBrowserRequest detects browser-originated cross-site requests.
|
||
// It is used as an additional safeguard for sessionless proxy-auth flows.
|
||
func isCrossSiteBrowserRequest(req *http.Request) bool {
|
||
if req == nil {
|
||
return false
|
||
}
|
||
|
||
switch strings.ToLower(strings.TrimSpace(req.Header.Get("Sec-Fetch-Site"))) {
|
||
case "cross-site":
|
||
return true
|
||
case "same-origin", "same-site", "none":
|
||
return false
|
||
}
|
||
|
||
expected := requestOrigin(req)
|
||
if expected == "" {
|
||
return false
|
||
}
|
||
|
||
if origin := strings.TrimSpace(req.Header.Get("Origin")); origin != "" {
|
||
if strings.EqualFold(origin, "null") {
|
||
return true
|
||
}
|
||
return !sameOrigin(origin, expected)
|
||
}
|
||
|
||
if referer := strings.TrimSpace(req.Header.Get("Referer")); referer != "" {
|
||
return !sameOrigin(referer, expected)
|
||
}
|
||
|
||
// Allow non-browser or legacy clients with neither Origin nor Referer.
|
||
return false
|
||
}
|
||
|
||
func canCapturePublicURL(cfg *config.Config, req *http.Request) bool {
|
||
if cfg == nil || req == nil {
|
||
return false
|
||
}
|
||
|
||
// Proxy Auth: Require Admin
|
||
if cfg.ProxyAuthSecret != "" {
|
||
if valid, _, isAdmin := CheckProxyAuth(cfg, req); valid && isAdmin {
|
||
return true
|
||
}
|
||
}
|
||
|
||
// API Tokens: Require settings:write scope
|
||
if cfg.HasAPITokens() {
|
||
if token := strings.TrimSpace(req.Header.Get("X-API-Token")); token != "" {
|
||
if record, ok := cfg.ValidateAPIToken(token); ok && record.HasScope(config.ScopeSettingsWrite) {
|
||
return true
|
||
}
|
||
}
|
||
if authHeader := strings.TrimSpace(req.Header.Get("Authorization")); strings.HasPrefix(strings.ToLower(authHeader), "bearer ") {
|
||
if record, ok := cfg.ValidateAPIToken(strings.TrimSpace(authHeader[7:])); ok && record.HasScope(config.ScopeSettingsWrite) {
|
||
return true
|
||
}
|
||
}
|
||
}
|
||
|
||
// Session (Browser): allow capture only for the configured local admin session.
|
||
// This prevents low-privilege session users from poisoning public URL auto-detection.
|
||
if cookie, err := readSessionCookie(req); err == nil && cookie.Value != "" {
|
||
if ValidateSession(cookie.Value) {
|
||
adminUser := strings.TrimSpace(cfg.AuthUser)
|
||
if adminUser != "" {
|
||
username := strings.TrimSpace(GetSessionUsername(cookie.Value))
|
||
if strings.EqualFold(username, adminUser) {
|
||
return true
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Basic Auth: Trusted (Admin)
|
||
if cfg.AuthUser != "" && cfg.AuthPass != "" {
|
||
const prefix = "Basic "
|
||
if authHeader := req.Header.Get("Authorization"); strings.HasPrefix(authHeader, prefix) {
|
||
if decoded, err := base64.StdEncoding.DecodeString(authHeader[len(prefix):]); err == nil {
|
||
if parts := strings.SplitN(string(decoded), ":", 2); len(parts) == 2 {
|
||
if parts[0] == cfg.AuthUser && internalauth.CheckPasswordHash(parts[1], cfg.AuthPass) {
|
||
return true
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return false
|
||
}
|
||
|
||
// handleHealth handles health check requests
|
||
func (r *Router) handleHealth(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodGet && req.Method != http.MethodHead {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
monitorHealthy := r.monitor != nil
|
||
schedulerHealthy := false
|
||
if monitorHealthy {
|
||
schedulerHealthy = r.monitor.SchedulerHealth().DeadLetter.Count == 0
|
||
}
|
||
|
||
statusCode := http.StatusOK
|
||
status := "healthy"
|
||
if !monitorHealthy || !schedulerHealthy {
|
||
statusCode = http.StatusServiceUnavailable
|
||
status = "unhealthy"
|
||
}
|
||
|
||
uptimeSeconds := 0.0
|
||
if monitorHealthy {
|
||
uptimeSeconds = time.Since(r.monitor.GetStartTime()).Seconds()
|
||
}
|
||
|
||
response := EmptyHealthResponse()
|
||
response.Status = status
|
||
response.Timestamp = time.Now().Unix()
|
||
response.Uptime = uptimeSeconds
|
||
response.ProxyInstallScriptAvailable = true
|
||
response.DevModeSSH = os.Getenv("PULSE_DEV_ALLOW_CONTAINER_SSH") == "true"
|
||
response.Dependencies = map[string]bool{
|
||
"monitor": monitorHealthy,
|
||
"scheduler": schedulerHealthy,
|
||
"websocket": r.wsHub != nil,
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
w.WriteHeader(statusCode)
|
||
if err := utils.WriteJSONResponse(w, response); err != nil {
|
||
log.Error().Err(err).Msg("Failed to write health response")
|
||
}
|
||
}
|
||
|
||
// handleSchedulerHealth returns scheduler health status for adaptive polling
|
||
func (r *Router) handleSchedulerHealth(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodGet && req.Method != http.MethodHead {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
if r.monitor == nil {
|
||
http.Error(w, "Monitor not available", http.StatusServiceUnavailable)
|
||
return
|
||
}
|
||
|
||
health := r.monitor.SchedulerHealth()
|
||
if err := utils.WriteJSONResponse(w, health); err != nil {
|
||
log.Error().Err(err).Msg("Failed to write scheduler health response")
|
||
}
|
||
}
|
||
|
||
// handleChangePassword handles password change requests
|
||
func (r *Router) handleChangePassword(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodPost {
|
||
writeErrorResponse(w, http.StatusMethodNotAllowed, "method_not_allowed",
|
||
"Only POST method is allowed", nil)
|
||
return
|
||
}
|
||
|
||
// SECURITY: Require authentication before allowing password change attempts
|
||
// This prevents brute-force attacks on the current password
|
||
if !CheckAuth(r.config, w, req) {
|
||
log.Warn().
|
||
Str("ip", req.RemoteAddr).
|
||
Str("path", req.URL.Path).
|
||
Msg("Unauthenticated password change attempt blocked")
|
||
// CheckAuth already wrote the error response
|
||
return
|
||
}
|
||
|
||
// Apply rate limiting to password change attempts to prevent brute-force
|
||
clientIP := GetClientIP(req)
|
||
if !authLimiter.Allow(clientIP) {
|
||
log.Warn().
|
||
Str("ip", clientIP).
|
||
Msg("Rate limit exceeded for password change")
|
||
writeErrorResponse(w, http.StatusTooManyRequests, "rate_limited",
|
||
"Too many password change attempts. Please try again later.", nil)
|
||
return
|
||
}
|
||
|
||
// Check lockout status for the client IP
|
||
_, lockedUntil, isLocked := GetLockoutInfo(clientIP)
|
||
if isLocked {
|
||
remainingMinutes := int(time.Until(lockedUntil).Minutes())
|
||
if remainingMinutes < 1 {
|
||
remainingMinutes = 1
|
||
}
|
||
log.Warn().
|
||
Str("ip", clientIP).
|
||
Time("locked_until", lockedUntil).
|
||
Msg("Password change blocked - IP locked out")
|
||
writeErrorResponse(w, http.StatusForbidden, "locked_out",
|
||
fmt.Sprintf("Too many failed attempts. Try again in %d minutes.", remainingMinutes), nil)
|
||
return
|
||
}
|
||
|
||
// Check if using proxy auth and if so, verify admin status
|
||
if r.config.ProxyAuthSecret != "" {
|
||
if valid, username, isAdmin := CheckProxyAuth(r.config, req); valid {
|
||
if !isAdmin {
|
||
// User is authenticated but not an admin
|
||
log.Warn().
|
||
Str("ip", req.RemoteAddr).
|
||
Str("path", req.URL.Path).
|
||
Str("method", req.Method).
|
||
Str("username", username).
|
||
Msg("Non-admin user attempted to change password")
|
||
|
||
// Return forbidden error
|
||
writeErrorResponse(w, http.StatusForbidden, "forbidden",
|
||
"Admin privileges required", nil)
|
||
return
|
||
}
|
||
}
|
||
}
|
||
|
||
// Parse request
|
||
var changeReq struct {
|
||
CurrentPassword string `json:"currentPassword"`
|
||
NewPassword string `json:"newPassword"`
|
||
}
|
||
|
||
if err := json.NewDecoder(req.Body).Decode(&changeReq); err != nil {
|
||
writeErrorResponse(w, http.StatusBadRequest, "invalid_request",
|
||
"Invalid request body", nil)
|
||
return
|
||
}
|
||
|
||
// Validate new password complexity
|
||
if err := auth.ValidatePasswordComplexity(changeReq.NewPassword); err != nil {
|
||
writeErrorResponse(w, http.StatusBadRequest, "invalid_password",
|
||
err.Error(), nil)
|
||
return
|
||
}
|
||
|
||
// Verify current password matches
|
||
// When behind a proxy with Basic Auth, the proxy may overwrite the Authorization header
|
||
// So we verify the current password from the JSON body instead
|
||
|
||
// First, validate that currentPassword was provided
|
||
if changeReq.CurrentPassword == "" {
|
||
writeErrorResponse(w, http.StatusUnauthorized, "unauthorized",
|
||
"Current password required", nil)
|
||
return
|
||
}
|
||
|
||
// Check if we should use Basic Auth header or JSON body for verification
|
||
// If there's an Authorization header AND it's not from a proxy, use it
|
||
authHeader := req.Header.Get("Authorization")
|
||
useAuthHeader := false
|
||
username := r.config.AuthUser // Default to configured username
|
||
|
||
if authHeader != "" {
|
||
const basicPrefix = "Basic "
|
||
if strings.HasPrefix(authHeader, basicPrefix) {
|
||
decoded, err := base64.StdEncoding.DecodeString(authHeader[len(basicPrefix):])
|
||
if err == nil {
|
||
parts := strings.SplitN(string(decoded), ":", 2)
|
||
if len(parts) == 2 {
|
||
// Check if this looks like Pulse credentials (matching username)
|
||
if parts[0] == r.config.AuthUser {
|
||
// This is likely from Pulse's own auth, not a proxy
|
||
username = parts[0]
|
||
useAuthHeader = true
|
||
// Verify the password from the header matches
|
||
if !auth.CheckPasswordHash(parts[1], r.config.AuthPass) {
|
||
log.Warn().
|
||
Str("ip", req.RemoteAddr).
|
||
Str("username", username).
|
||
Msg("Failed password change attempt - incorrect current password in auth header")
|
||
RecordFailedLogin(clientIP)
|
||
writeErrorResponse(w, http.StatusUnauthorized, "unauthorized",
|
||
"Current password is incorrect", nil)
|
||
return
|
||
}
|
||
}
|
||
// If username doesn't match, this is likely proxy auth - ignore it
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// If we didn't use the auth header, or need to double-check, verify from JSON body
|
||
if !useAuthHeader || changeReq.CurrentPassword != "" {
|
||
// Verify current password from JSON body
|
||
if !auth.CheckPasswordHash(changeReq.CurrentPassword, r.config.AuthPass) {
|
||
log.Warn().
|
||
Str("ip", req.RemoteAddr).
|
||
Str("username", username).
|
||
Msg("Failed password change attempt - incorrect current password")
|
||
RecordFailedLogin(clientIP)
|
||
writeErrorResponse(w, http.StatusUnauthorized, "unauthorized",
|
||
"Current password is incorrect", nil)
|
||
return
|
||
}
|
||
}
|
||
|
||
// Hash the new password before storing
|
||
hashedPassword, err := auth.HashPassword(changeReq.NewPassword)
|
||
if err != nil {
|
||
log.Error().Err(err).Msg("Failed to hash new password")
|
||
writeErrorResponse(w, http.StatusInternalServerError, "hash_error",
|
||
"Failed to process new password", nil)
|
||
return
|
||
}
|
||
|
||
// Check if we're running in Docker
|
||
isDocker := os.Getenv("PULSE_DOCKER") == "true"
|
||
|
||
if isDocker {
|
||
// For Docker, update the .env file in the data directory
|
||
envPath := resolveAuthEnvPath(r.config.ConfigPath)
|
||
|
||
// Read existing .env file to preserve other settings
|
||
envContent := ""
|
||
existingContent, err := os.ReadFile(envPath)
|
||
if err == nil {
|
||
// Parse existing content and update password
|
||
scanner := bufio.NewScanner(strings.NewReader(string(existingContent)))
|
||
for scanner.Scan() {
|
||
line := scanner.Text()
|
||
// Skip empty lines and comments
|
||
if line == "" || strings.HasPrefix(line, "#") {
|
||
envContent += line + "\n"
|
||
continue
|
||
}
|
||
// Update password line, keep others
|
||
if strings.HasPrefix(line, "PULSE_AUTH_PASS=") {
|
||
envContent += fmt.Sprintf("PULSE_AUTH_PASS='%s'\n", hashedPassword)
|
||
} else {
|
||
envContent += line + "\n"
|
||
}
|
||
}
|
||
} else {
|
||
// Create new .env file if it doesn't exist
|
||
envContent = fmt.Sprintf(`# Auto-generated by Pulse password change
|
||
# Generated on %s
|
||
PULSE_AUTH_USER='%s'
|
||
PULSE_AUTH_PASS='%s'
|
||
`, time.Now().Format(time.RFC3339), r.config.AuthUser, hashedPassword)
|
||
}
|
||
|
||
// Write the updated .env file
|
||
envPath, err = writeAuthEnvFile(r.config.ConfigPath, r.config.DataPath, []byte(envContent))
|
||
if err != nil {
|
||
log.Error().Err(err).Msg("Failed to write .env file")
|
||
writeErrorResponse(w, http.StatusInternalServerError, "config_error",
|
||
"Failed to save new password", nil)
|
||
return
|
||
}
|
||
|
||
// Update the running config
|
||
r.config.AuthPass = hashedPassword
|
||
|
||
log.Info().Msg("Password changed successfully in Docker environment")
|
||
|
||
// Invalidate all sessions
|
||
InvalidateUserSessions(r.config.AuthUser)
|
||
|
||
// Audit log
|
||
LogAuditEventForTenant(GetOrgID(req.Context()), "password_change", r.config.AuthUser, GetClientIP(req), req.URL.Path, true, "Password changed (Docker)")
|
||
|
||
// Return success with Docker-specific message
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||
"success": true,
|
||
"message": "Password changed successfully. Please restart your Docker container to apply changes.",
|
||
})
|
||
|
||
} else {
|
||
// For non-Docker (systemd/manual), save to .env file
|
||
envPath := resolveAuthEnvPath(r.config.ConfigPath)
|
||
|
||
// Read existing .env file to preserve other settings
|
||
envContent := ""
|
||
existingContent, err := os.ReadFile(envPath)
|
||
if err == nil {
|
||
// Parse and update existing content
|
||
scanner := bufio.NewScanner(strings.NewReader(string(existingContent)))
|
||
for scanner.Scan() {
|
||
line := scanner.Text()
|
||
if line == "" || strings.HasPrefix(line, "#") {
|
||
envContent += line + "\n"
|
||
continue
|
||
}
|
||
// Update password line, keep others
|
||
if strings.HasPrefix(line, "PULSE_AUTH_PASS=") {
|
||
envContent += fmt.Sprintf("PULSE_AUTH_PASS='%s'\n", hashedPassword)
|
||
} else {
|
||
envContent += line + "\n"
|
||
}
|
||
}
|
||
} else {
|
||
// Create new .env if doesn't exist
|
||
envContent = fmt.Sprintf(`# Auto-generated by Pulse password change
|
||
# Generated on %s
|
||
PULSE_AUTH_USER='%s'
|
||
PULSE_AUTH_PASS='%s'
|
||
`, time.Now().Format(time.RFC3339), r.config.AuthUser, hashedPassword)
|
||
}
|
||
|
||
// Try to write the .env file
|
||
envPath, err = writeAuthEnvFile(r.config.ConfigPath, r.config.DataPath, []byte(envContent))
|
||
if err != nil {
|
||
log.Error().Err(err).Msg("Failed to write .env file")
|
||
writeErrorResponse(w, http.StatusInternalServerError, "config_error",
|
||
"Failed to save new password. You may need to update the password manually.", nil)
|
||
return
|
||
}
|
||
|
||
// Update the running config
|
||
r.config.AuthPass = hashedPassword
|
||
|
||
log.Info().Msg("Password changed successfully")
|
||
|
||
// Invalidate all sessions
|
||
InvalidateUserSessions(r.config.AuthUser)
|
||
|
||
// Audit log
|
||
LogAuditEventForTenant(GetOrgID(req.Context()), "password_change", r.config.AuthUser, GetClientIP(req), req.URL.Path, true, "Password changed")
|
||
|
||
// Detect service name for restart instructions
|
||
serviceName := detectServiceName()
|
||
|
||
// Return success with manual restart instructions
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||
"success": true,
|
||
"message": fmt.Sprintf("Password changed. Restart the service to apply: sudo systemctl restart %s", serviceName),
|
||
"requiresRestart": true,
|
||
"serviceName": serviceName,
|
||
})
|
||
}
|
||
}
|
||
|
||
// handleLogout handles logout requests
|
||
func (r *Router) handleLogout(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodPost {
|
||
writeErrorResponse(w, http.StatusMethodNotAllowed, "method_not_allowed",
|
||
"Only POST method is allowed", nil)
|
||
return
|
||
}
|
||
|
||
// Get session token from cookie
|
||
var sessionToken string
|
||
if cookie, err := readSessionCookie(req); err == nil {
|
||
sessionToken = cookie.Value
|
||
}
|
||
|
||
// Delete the session if it exists
|
||
if sessionToken != "" {
|
||
GetSessionStore().DeleteSession(sessionToken)
|
||
|
||
// Also delete CSRF token if exists
|
||
GetCSRFStore().DeleteCSRFToken(sessionToken)
|
||
}
|
||
|
||
// Get appropriate cookie settings based on proxy detection (consistent with login)
|
||
isSecure, sameSitePolicy := getCookieSettings(req)
|
||
|
||
// Clear both session cookie variants (prefixed and unprefixed) to ensure
|
||
// a clean logout regardless of how the cookie was originally set.
|
||
for _, name := range []string{cookieNameSession, cookieNameSessionSecure} {
|
||
http.SetCookie(w, &http.Cookie{
|
||
Name: name,
|
||
Value: "",
|
||
Path: "/",
|
||
MaxAge: -1,
|
||
HttpOnly: true,
|
||
Secure: isSecure,
|
||
SameSite: sameSitePolicy,
|
||
})
|
||
}
|
||
|
||
// Audit log logout (use admin as username since we have single user for now)
|
||
LogAuditEventForTenant(GetOrgID(req.Context()), "logout", "admin", GetClientIP(req), req.URL.Path, true, "User logged out")
|
||
|
||
log.Info().
|
||
Str("user", "admin").
|
||
Str("ip", GetClientIP(req)).
|
||
Msg("User logged out")
|
||
|
||
// Return success
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||
"success": true,
|
||
"message": "Successfully logged out",
|
||
})
|
||
}
|
||
|
||
func (r *Router) establishSession(w http.ResponseWriter, req *http.Request, username string) error {
|
||
// Invalidate any pre-existing session to prevent session fixation attacks.
|
||
InvalidateOldSessionFromRequest(req)
|
||
|
||
token := generateSessionToken()
|
||
if token == "" {
|
||
return fmt.Errorf("failed to generate session token")
|
||
}
|
||
|
||
userAgent := req.Header.Get("User-Agent")
|
||
clientIP := GetClientIP(req)
|
||
GetSessionStore().CreateSession(token, 24*time.Hour, userAgent, clientIP, username)
|
||
|
||
if username != "" {
|
||
TrackUserSession(username, token)
|
||
}
|
||
|
||
csrfToken := generateCSRFToken(token)
|
||
isSecure, sameSitePolicy := getCookieSettings(req)
|
||
|
||
http.SetCookie(w, &http.Cookie{
|
||
Name: sessionCookieName(isSecure),
|
||
Value: token,
|
||
Path: "/",
|
||
HttpOnly: true,
|
||
Secure: isSecure,
|
||
SameSite: sameSitePolicy,
|
||
MaxAge: 86400,
|
||
})
|
||
|
||
http.SetCookie(w, &http.Cookie{
|
||
Name: CookieNameCSRF,
|
||
Value: csrfToken,
|
||
Path: "/",
|
||
Secure: isSecure,
|
||
SameSite: sameSitePolicy,
|
||
MaxAge: 86400,
|
||
})
|
||
|
||
return nil
|
||
}
|
||
|
||
// establishOIDCSession creates a session with OIDC token information for refresh token support
|
||
func (r *Router) establishOIDCSession(w http.ResponseWriter, req *http.Request, username string, oidcTokens *OIDCTokenInfo) error {
|
||
// Invalidate any pre-existing session to prevent session fixation attacks.
|
||
InvalidateOldSessionFromRequest(req)
|
||
|
||
token := generateSessionToken()
|
||
if token == "" {
|
||
return fmt.Errorf("failed to generate session token")
|
||
}
|
||
|
||
userAgent := req.Header.Get("User-Agent")
|
||
clientIP := GetClientIP(req)
|
||
|
||
// Create session with OIDC tokens (including username for restart survival)
|
||
GetSessionStore().CreateOIDCSession(token, 24*time.Hour, userAgent, clientIP, username, oidcTokens)
|
||
|
||
if username != "" {
|
||
TrackUserSession(username, token)
|
||
}
|
||
|
||
csrfToken := generateCSRFToken(token)
|
||
isSecure, sameSitePolicy := getCookieSettings(req)
|
||
|
||
http.SetCookie(w, &http.Cookie{
|
||
Name: sessionCookieName(isSecure),
|
||
Value: token,
|
||
Path: "/",
|
||
HttpOnly: true,
|
||
Secure: isSecure,
|
||
SameSite: sameSitePolicy,
|
||
MaxAge: 86400,
|
||
})
|
||
|
||
http.SetCookie(w, &http.Cookie{
|
||
Name: CookieNameCSRF,
|
||
Value: csrfToken,
|
||
Path: "/",
|
||
Secure: isSecure,
|
||
SameSite: sameSitePolicy,
|
||
MaxAge: 86400,
|
||
})
|
||
|
||
return nil
|
||
}
|
||
|
||
// handleLogin handles login requests and provides detailed feedback about lockouts
|
||
func (r *Router) handleLogin(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodPost {
|
||
writeErrorResponse(w, http.StatusMethodNotAllowed, "method_not_allowed",
|
||
"Only POST method is allowed", nil)
|
||
return
|
||
}
|
||
|
||
// Parse request
|
||
var loginReq struct {
|
||
Username string `json:"username"`
|
||
Password string `json:"password"`
|
||
RememberMe bool `json:"rememberMe"`
|
||
}
|
||
|
||
if err := json.NewDecoder(req.Body).Decode(&loginReq); err != nil {
|
||
writeErrorResponse(w, http.StatusBadRequest, "invalid_request",
|
||
"Invalid request body", nil)
|
||
return
|
||
}
|
||
|
||
clientIP := GetClientIP(req)
|
||
|
||
// Check if account is locked out before attempting login
|
||
_, userLockedUntil, userLocked := GetLockoutInfo(loginReq.Username)
|
||
_, ipLockedUntil, ipLocked := GetLockoutInfo(clientIP)
|
||
|
||
if userLocked || ipLocked {
|
||
lockedUntil := userLockedUntil
|
||
if ipLocked && ipLockedUntil.After(lockedUntil) {
|
||
lockedUntil = ipLockedUntil
|
||
}
|
||
|
||
remainingMinutes := int(time.Until(lockedUntil).Minutes())
|
||
if remainingMinutes < 1 {
|
||
remainingMinutes = 1
|
||
}
|
||
|
||
LogAuditEventForTenant(GetOrgID(req.Context()), "login", loginReq.Username, clientIP, req.URL.Path, false, "Account locked")
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
w.WriteHeader(http.StatusForbidden)
|
||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||
"error": "account_locked",
|
||
"message": fmt.Sprintf("Too many failed attempts. Account is locked for %d more minutes.", remainingMinutes),
|
||
"lockedUntil": lockedUntil.Format(time.RFC3339),
|
||
"remainingMinutes": remainingMinutes,
|
||
})
|
||
return
|
||
}
|
||
|
||
// Check rate limiting
|
||
if !authLimiter.Allow(clientIP) {
|
||
LogAuditEventForTenant(GetOrgID(req.Context()), "login", loginReq.Username, clientIP, req.URL.Path, false, "Rate limited")
|
||
w.Header().Set("Content-Type", "application/json")
|
||
w.WriteHeader(http.StatusTooManyRequests)
|
||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||
"error": "rate_limit",
|
||
"message": "Too many requests. Please wait before trying again.",
|
||
})
|
||
return
|
||
}
|
||
|
||
// Verify credentials
|
||
if loginReq.Username == r.config.AuthUser && auth.CheckPasswordHash(loginReq.Password, r.config.AuthPass) {
|
||
// Clear failed login attempts
|
||
ClearFailedLogins(loginReq.Username)
|
||
ClearFailedLogins(clientIP)
|
||
|
||
// Invalidate any pre-existing session to prevent session fixation attacks.
|
||
InvalidateOldSessionFromRequest(req)
|
||
|
||
// Create session
|
||
token := generateSessionToken()
|
||
if token == "" {
|
||
writeErrorResponse(w, http.StatusInternalServerError, "session_error",
|
||
"Failed to create session", nil)
|
||
return
|
||
}
|
||
|
||
// Store session persistently with appropriate duration (including username for restart survival)
|
||
userAgent := req.Header.Get("User-Agent")
|
||
sessionDuration := 24 * time.Hour
|
||
if loginReq.RememberMe {
|
||
sessionDuration = 30 * 24 * time.Hour // 30 days
|
||
}
|
||
GetSessionStore().CreateSession(token, sessionDuration, userAgent, clientIP, loginReq.Username)
|
||
|
||
// Track session for user (in-memory for fast lookups)
|
||
TrackUserSession(loginReq.Username, token)
|
||
|
||
// Generate CSRF token
|
||
csrfToken := generateCSRFToken(token)
|
||
|
||
// Get appropriate cookie settings based on proxy detection
|
||
isSecure, sameSitePolicy := getCookieSettings(req)
|
||
|
||
// Set cookie MaxAge to match session duration
|
||
cookieMaxAge := int(sessionDuration.Seconds())
|
||
|
||
// Set session cookie
|
||
http.SetCookie(w, &http.Cookie{
|
||
Name: sessionCookieName(isSecure),
|
||
Value: token,
|
||
Path: "/",
|
||
HttpOnly: true,
|
||
Secure: isSecure,
|
||
SameSite: sameSitePolicy,
|
||
MaxAge: cookieMaxAge,
|
||
})
|
||
|
||
// Set CSRF cookie (not HttpOnly so JS can read it)
|
||
http.SetCookie(w, &http.Cookie{
|
||
Name: CookieNameCSRF,
|
||
Value: csrfToken,
|
||
Path: "/",
|
||
Secure: isSecure,
|
||
SameSite: sameSitePolicy,
|
||
MaxAge: cookieMaxAge,
|
||
})
|
||
|
||
// Audit log successful login
|
||
LogAuditEventForTenant(GetOrgID(req.Context()), "login", loginReq.Username, clientIP, req.URL.Path, true, "Successful login")
|
||
|
||
// Return success
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||
"success": true,
|
||
"message": "Successfully logged in",
|
||
})
|
||
} else {
|
||
// Failed login
|
||
RecordFailedLogin(loginReq.Username)
|
||
RecordFailedLogin(clientIP)
|
||
LogAuditEventForTenant(GetOrgID(req.Context()), "login", loginReq.Username, clientIP, req.URL.Path, false, "Invalid credentials")
|
||
|
||
// Get updated attempt counts
|
||
newUserAttempts, _, _ := GetLockoutInfo(loginReq.Username)
|
||
newIPAttempts, _, _ := GetLockoutInfo(clientIP)
|
||
|
||
// Use the higher count for warning
|
||
attempts := newUserAttempts
|
||
if newIPAttempts > attempts {
|
||
attempts = newIPAttempts
|
||
}
|
||
|
||
// Prepare response with attempt information
|
||
remaining := maxFailedAttempts - attempts
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
w.WriteHeader(http.StatusUnauthorized)
|
||
|
||
if remaining > 0 {
|
||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||
"error": "invalid_credentials",
|
||
"message": fmt.Sprintf("Invalid username or password. You have %d attempts remaining.", remaining),
|
||
"attempts": attempts,
|
||
"remaining": remaining,
|
||
"maxAttempts": maxFailedAttempts,
|
||
})
|
||
} else {
|
||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||
"error": "invalid_credentials",
|
||
"message": "Invalid username or password. Account is now locked for 15 minutes.",
|
||
"locked": true,
|
||
"lockoutDuration": "15 minutes",
|
||
})
|
||
}
|
||
}
|
||
}
|
||
|
||
// handleResetLockout allows administrators to manually reset account lockouts
|
||
func (r *Router) handleResetLockout(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodPost {
|
||
writeErrorResponse(w, http.StatusMethodNotAllowed, "method_not_allowed",
|
||
"Only POST method is allowed", nil)
|
||
return
|
||
}
|
||
|
||
// Use RequireAdmin to ensure proper admin checks (including proxy auth) for session users
|
||
RequireAdmin(r.config, func(w http.ResponseWriter, req *http.Request) {
|
||
if !ensureSettingsWriteScope(r.config, w, req) {
|
||
return
|
||
}
|
||
|
||
// Parse request
|
||
var resetReq struct {
|
||
Identifier string `json:"identifier"` // Can be username or IP
|
||
}
|
||
|
||
if err := json.NewDecoder(req.Body).Decode(&resetReq); err != nil {
|
||
writeErrorResponse(w, http.StatusBadRequest, "invalid_request",
|
||
"Invalid request body", nil)
|
||
return
|
||
}
|
||
|
||
if resetReq.Identifier == "" {
|
||
writeErrorResponse(w, http.StatusBadRequest, "missing_identifier",
|
||
"Identifier (username or IP) is required", nil)
|
||
return
|
||
}
|
||
|
||
// Reset the lockout
|
||
ResetLockout(resetReq.Identifier)
|
||
|
||
// Also clear failed login attempts
|
||
ClearFailedLogins(resetReq.Identifier)
|
||
|
||
// Audit log the reset
|
||
LogAuditEventForTenant(GetOrgID(req.Context()), "lockout_reset", "admin", GetClientIP(req), req.URL.Path, true,
|
||
fmt.Sprintf("Lockout reset for: %s", resetReq.Identifier))
|
||
|
||
log.Info().
|
||
Str("identifier", resetReq.Identifier).
|
||
Str("reset_by", "admin").
|
||
Str("ip", GetClientIP(req)).
|
||
Msg("Account lockout manually reset")
|
||
|
||
// Return success
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||
"success": true,
|
||
"message": fmt.Sprintf("Lockout reset for %s", resetReq.Identifier),
|
||
})
|
||
})(w, req)
|
||
}
|
||
|
||
// handleState handles state requests
|
||
func (r *Router) handleState(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodGet {
|
||
writeErrorResponse(w, http.StatusMethodNotAllowed, "method_not_allowed",
|
||
"Only GET method is allowed", nil)
|
||
return
|
||
}
|
||
|
||
// Use standard auth check (supports both basic auth and API tokens) unless auth is disabled
|
||
if !CheckAuth(r.config, w, req) {
|
||
writeErrorResponse(w, http.StatusUnauthorized, "unauthorized",
|
||
"Authentication required", nil)
|
||
return
|
||
}
|
||
|
||
if record := getAPITokenRecordFromRequest(req); record != nil && !record.HasScope(config.ScopeMonitoringRead) {
|
||
respondMissingScope(w, config.ScopeMonitoringRead)
|
||
return
|
||
}
|
||
|
||
// Use tenant-aware monitor to get state for the current organization
|
||
monitor := r.getTenantMonitor(req.Context())
|
||
if monitor == nil {
|
||
writeErrorResponse(w, http.StatusInternalServerError, "no_monitor",
|
||
"Monitor not available", nil)
|
||
return
|
||
}
|
||
frontendState := monitor.BuildFrontendState()
|
||
|
||
if err := utils.WriteJSONResponse(w, frontendState); err != nil {
|
||
log.Error().Err(err).Msg("Failed to encode state response")
|
||
writeErrorResponse(w, http.StatusInternalServerError, "encoding_error",
|
||
"Failed to encode state data", nil)
|
||
}
|
||
}
|
||
|
||
// handleVersion handles version requests
|
||
func (r *Router) handleVersion(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodGet && req.Method != http.MethodHead {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
versionInfo, err := updates.GetCurrentVersion()
|
||
if err != nil {
|
||
// Fallback to VERSION file
|
||
versionBytes, _ := os.ReadFile("VERSION")
|
||
response := VersionResponse{
|
||
Version: strings.TrimSpace(string(versionBytes)),
|
||
BuildTime: "development",
|
||
Build: "development",
|
||
GoVersion: runtime.Version(),
|
||
Runtime: runtime.Version(),
|
||
Channel: "stable",
|
||
IsDocker: false,
|
||
IsSourceBuild: false,
|
||
IsDevelopment: true,
|
||
}
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(response)
|
||
return
|
||
}
|
||
|
||
// Convert to typed response
|
||
response := VersionResponse{
|
||
Version: versionInfo.Version,
|
||
BuildTime: versionInfo.Build,
|
||
Build: versionInfo.Build,
|
||
GoVersion: runtime.Version(),
|
||
Runtime: versionInfo.Runtime,
|
||
Channel: versionInfo.Channel,
|
||
IsDocker: versionInfo.IsDocker,
|
||
IsSourceBuild: versionInfo.IsSourceBuild,
|
||
IsDevelopment: versionInfo.IsDevelopment,
|
||
DeploymentType: versionInfo.DeploymentType,
|
||
}
|
||
|
||
// Detect containerization (LXC/Docker)
|
||
if containerType, err := os.ReadFile("/run/systemd/container"); err == nil {
|
||
response.Containerized = true
|
||
|
||
// Try to get container ID from hostname (LXC containers often use CTID as hostname)
|
||
if hostname, err := os.Hostname(); err == nil {
|
||
// For LXC, try to extract numeric ID from hostname or use full hostname
|
||
response.ContainerID = hostname
|
||
}
|
||
|
||
// Add container type to deployment type if not already set
|
||
if response.DeploymentType == "" {
|
||
response.DeploymentType = string(containerType)
|
||
}
|
||
}
|
||
|
||
// Add cached update info if available
|
||
if cachedUpdate := r.updateManager.GetCachedUpdateInfo(); cachedUpdate != nil {
|
||
response.UpdateAvailable = cachedUpdate.Available
|
||
response.LatestVersion = cachedUpdate.LatestVersion
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(response)
|
||
}
|
||
|
||
// handleAgentVersion returns the current server version for agent update checks.
|
||
// Agents compare this to their own version to determine if an update is available.
|
||
func (r *Router) handleAgentVersion(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodGet && req.Method != http.MethodHead {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
// Return the server version - all agents should match the server version
|
||
version := "dev"
|
||
if versionInfo, err := updates.GetCurrentVersion(); err == nil {
|
||
version = versionInfo.Version
|
||
}
|
||
|
||
response := AgentVersionResponse{
|
||
Version: version,
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(response)
|
||
}
|
||
func (r *Router) handleServerInfo(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodGet && req.Method != http.MethodHead {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
versionInfo, err := updates.GetCurrentVersion()
|
||
isDev := true
|
||
version := "dev"
|
||
if err == nil {
|
||
isDev = versionInfo.IsDevelopment
|
||
version = versionInfo.Version
|
||
}
|
||
|
||
response := map[string]interface{}{
|
||
"isDevelopment": isDev,
|
||
"version": version,
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(response)
|
||
}
|
||
|
||
// handleStorage handles storage detail requests
|
||
func (r *Router) handleStorage(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodGet {
|
||
writeErrorResponse(w, http.StatusMethodNotAllowed, "method_not_allowed",
|
||
"Only GET method is allowed", nil)
|
||
return
|
||
}
|
||
|
||
// Extract storage ID from path
|
||
path := strings.TrimPrefix(req.URL.Path, "/api/storage/")
|
||
if path == "" {
|
||
writeErrorResponse(w, http.StatusBadRequest, "missing_storage_id",
|
||
"Storage ID is required", nil)
|
||
return
|
||
}
|
||
|
||
// Get tenant-specific monitor and current state
|
||
monitor := r.getTenantMonitor(req.Context())
|
||
if monitor == nil {
|
||
writeErrorResponse(w, http.StatusInternalServerError, "tenant_unavailable", "Tenant monitor is not available", nil)
|
||
return
|
||
}
|
||
// Find the storage by ID
|
||
var storageDetail *models.Storage
|
||
for _, storage := range monitor.StorageSnapshot() {
|
||
if storage.ID == path {
|
||
storageDetail = &storage
|
||
break
|
||
}
|
||
}
|
||
|
||
if storageDetail == nil {
|
||
writeErrorResponse(w, http.StatusNotFound, "storage_not_found",
|
||
fmt.Sprintf("Storage with ID '%s' not found", path), nil)
|
||
return
|
||
}
|
||
|
||
// Return storage details
|
||
w.Header().Set("Content-Type", "application/json")
|
||
if err := json.NewEncoder(w).Encode(map[string]interface{}{
|
||
"data": storageDetail,
|
||
"timestamp": time.Now().Unix(),
|
||
}); err != nil {
|
||
log.Error().Err(err).Str("storage_id", path).Msg("Failed to encode storage details")
|
||
writeErrorResponse(w, http.StatusInternalServerError, "encoding_error",
|
||
"Failed to encode response", nil)
|
||
}
|
||
}
|
||
|
||
// handleCharts handles chart data requests
|
||
func (r *Router) handleCharts(w http.ResponseWriter, req *http.Request) {
|
||
log.Debug().Str("method", req.Method).Str("url", req.URL.String()).Msg("Charts endpoint hit")
|
||
const inMemoryChartThreshold = 2 * time.Hour
|
||
|
||
if req.Method != http.MethodGet && req.Method != http.MethodHead {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
// Get time range from query parameters
|
||
query := req.URL.Query()
|
||
timeRange := query.Get("range")
|
||
if timeRange == "" {
|
||
timeRange = "1h"
|
||
}
|
||
|
||
// Convert time range to duration.
|
||
duration := parseChartsRangeDuration(timeRange)
|
||
|
||
// Get tenant-specific monitor and current state
|
||
monitor := r.getTenantMonitor(req.Context())
|
||
if monitor == nil {
|
||
http.Error(w, "Tenant monitor is not available", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
readState := monitor.GetUnifiedReadStateOrSnapshot()
|
||
if readState == nil {
|
||
http.Error(w, "State unavailable", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
metricsStoreEnabled := monitor.GetMetricsStore() != nil
|
||
primarySourceHint := "memory"
|
||
if metricsStoreEnabled && duration > inMemoryChartThreshold {
|
||
primarySourceHint = "store_or_memory_fallback"
|
||
}
|
||
|
||
// Create chart data structure that matches frontend expectations
|
||
chartData := make(map[string]VMChartData)
|
||
nodeData := make(map[string]NodeChartData)
|
||
|
||
currentTime := time.Now().UnixMilli() // JavaScript timestamp format
|
||
oldestTimestamp := currentTime
|
||
|
||
// Process VMs - batch-load historical data (1-2 SQL calls instead of N).
|
||
vmList := readState.VMs()
|
||
vmRequests := make([]monitoring.GuestChartRequest, 0, len(vmList))
|
||
for _, vm := range vmList {
|
||
if vm == nil {
|
||
continue
|
||
}
|
||
if vid := vm.SourceID(); vid != "" {
|
||
vmRequests = append(vmRequests, monitoring.GuestChartRequest{InMemoryKey: vid, SQLResourceID: vid})
|
||
}
|
||
}
|
||
vmBatchMetrics := monitor.GetGuestMetricsForChartBatch("vm", vmRequests, duration, infrastructureSummaryMetricOrder...)
|
||
for _, vm := range vmList {
|
||
if vm == nil {
|
||
continue
|
||
}
|
||
vid := vm.SourceID()
|
||
if vid == "" {
|
||
continue
|
||
}
|
||
chartData[vid] = make(VMChartData)
|
||
if batchMetrics, ok := vmBatchMetrics[vid]; ok {
|
||
for metricType, points := range batchMetrics {
|
||
if !sparklineMetrics[metricType] {
|
||
continue
|
||
}
|
||
chartData[vid][metricType] = make([]MetricPoint, len(points))
|
||
for i, point := range points {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts < oldestTimestamp {
|
||
oldestTimestamp = ts
|
||
}
|
||
chartData[vid][metricType][i] = MetricPoint{
|
||
Timestamp: ts,
|
||
Value: point.Value,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if len(chartData[vid]["cpu"]) == 0 {
|
||
chartData[vid]["cpu"] = []MetricPoint{{Timestamp: currentTime, Value: vm.CPUPercent()}}
|
||
chartData[vid]["memory"] = []MetricPoint{{Timestamp: currentTime, Value: vm.MemoryPercent()}}
|
||
chartData[vid]["disk"] = []MetricPoint{{Timestamp: currentTime, Value: vm.DiskPercent()}}
|
||
chartData[vid]["netin"] = []MetricPoint{{Timestamp: currentTime, Value: vm.NetIn()}}
|
||
chartData[vid]["netout"] = []MetricPoint{{Timestamp: currentTime, Value: vm.NetOut()}}
|
||
}
|
||
}
|
||
|
||
// Process Containers - batch-load historical data (1-2 SQL calls instead of N).
|
||
ctList := readState.Containers()
|
||
ctRequests := make([]monitoring.GuestChartRequest, 0, len(ctList))
|
||
for _, ct := range ctList {
|
||
if ct == nil {
|
||
continue
|
||
}
|
||
if cid := ct.SourceID(); cid != "" {
|
||
ctRequests = append(ctRequests, monitoring.GuestChartRequest{InMemoryKey: cid, SQLResourceID: cid})
|
||
}
|
||
}
|
||
ctBatchMetrics := monitor.GetGuestMetricsForChartBatch("container", ctRequests, duration, infrastructureSummaryMetricOrder...)
|
||
for _, ct := range ctList {
|
||
if ct == nil {
|
||
continue
|
||
}
|
||
cid := ct.SourceID()
|
||
if cid == "" {
|
||
continue
|
||
}
|
||
chartData[cid] = make(VMChartData)
|
||
if batchMetrics, ok := ctBatchMetrics[cid]; ok {
|
||
for metricType, points := range batchMetrics {
|
||
if !sparklineMetrics[metricType] {
|
||
continue
|
||
}
|
||
chartData[cid][metricType] = make([]MetricPoint, len(points))
|
||
for i, point := range points {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts < oldestTimestamp {
|
||
oldestTimestamp = ts
|
||
}
|
||
chartData[cid][metricType][i] = MetricPoint{
|
||
Timestamp: ts,
|
||
Value: point.Value,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if len(chartData[cid]["cpu"]) == 0 {
|
||
chartData[cid]["cpu"] = []MetricPoint{{Timestamp: currentTime, Value: ct.CPUPercent()}}
|
||
chartData[cid]["memory"] = []MetricPoint{{Timestamp: currentTime, Value: ct.MemoryPercent()}}
|
||
chartData[cid]["disk"] = []MetricPoint{{Timestamp: currentTime, Value: ct.DiskPercent()}}
|
||
chartData[cid]["netin"] = []MetricPoint{{Timestamp: currentTime, Value: ct.NetIn()}}
|
||
chartData[cid]["netout"] = []MetricPoint{{Timestamp: currentTime, Value: ct.NetOut()}}
|
||
}
|
||
}
|
||
|
||
// Process Storage - batch-load historical data (1-2 SQL calls instead of N).
|
||
storageData := make(map[string]StorageChartData)
|
||
spList := readState.StoragePools()
|
||
storageIDs := make([]string, 0, len(spList))
|
||
for _, sp := range spList {
|
||
if sp == nil {
|
||
continue
|
||
}
|
||
if sid := sp.SourceID(); sid != "" {
|
||
storageIDs = append(storageIDs, sid)
|
||
}
|
||
}
|
||
storageBatchMetrics := monitor.GetStorageMetricsForChartBatch(storageIDs, duration)
|
||
for _, sp := range spList {
|
||
if sp == nil {
|
||
continue
|
||
}
|
||
sid := sp.SourceID()
|
||
if sid == "" {
|
||
continue
|
||
}
|
||
storageData[sid] = make(StorageChartData)
|
||
if batchMetrics, ok := storageBatchMetrics[sid]; ok {
|
||
if usagePoints, found := batchMetrics["usage"]; found && len(usagePoints) > 0 {
|
||
storageData[sid]["disk"] = make([]MetricPoint, len(usagePoints))
|
||
for i, point := range usagePoints {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts < oldestTimestamp {
|
||
oldestTimestamp = ts
|
||
}
|
||
storageData[sid]["disk"][i] = MetricPoint{
|
||
Timestamp: ts,
|
||
Value: point.Value,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if len(storageData[sid]["disk"]) == 0 {
|
||
storageData[sid]["disk"] = []MetricPoint{
|
||
{Timestamp: currentTime, Value: sp.DiskPercent()},
|
||
}
|
||
}
|
||
}
|
||
|
||
// Process Nodes - batch-load historical data (1-2 SQL calls instead of N×5).
|
||
nodeMetricTypes := []string{"cpu", "memory", "disk", "netin", "netout"}
|
||
nodeList := readState.Nodes()
|
||
nodeIDs := make([]string, 0, len(nodeList))
|
||
for _, node := range nodeList {
|
||
if node == nil {
|
||
continue
|
||
}
|
||
if nid := node.SourceID(); nid != "" {
|
||
nodeIDs = append(nodeIDs, nid)
|
||
}
|
||
}
|
||
nodeBatchMetrics := monitor.GetNodeMetricsForChartBatch(nodeIDs, nodeMetricTypes, duration)
|
||
for _, node := range nodeList {
|
||
if node == nil {
|
||
continue
|
||
}
|
||
nid := node.SourceID()
|
||
if nid == "" {
|
||
continue
|
||
}
|
||
nodeData[nid] = make(NodeChartData)
|
||
if batchMetrics, ok := nodeBatchMetrics[nid]; ok {
|
||
for _, metricType := range nodeMetricTypes {
|
||
points, found := batchMetrics[metricType]
|
||
if !found {
|
||
continue
|
||
}
|
||
nodeData[nid][metricType] = make([]MetricPoint, len(points))
|
||
for i, point := range points {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts < oldestTimestamp {
|
||
oldestTimestamp = ts
|
||
}
|
||
nodeData[nid][metricType][i] = MetricPoint{
|
||
Timestamp: ts,
|
||
Value: point.Value,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
for _, metricType := range nodeMetricTypes {
|
||
if len(nodeData[nid][metricType]) == 0 {
|
||
var value float64
|
||
hasFallbackValue := true
|
||
switch metricType {
|
||
case "cpu":
|
||
value = node.CPUPercent()
|
||
case "memory":
|
||
value = node.MemoryPercent()
|
||
case "disk":
|
||
value = node.DiskPercent()
|
||
default:
|
||
hasFallbackValue = false
|
||
}
|
||
if hasFallbackValue {
|
||
nodeData[nid][metricType] = []MetricPoint{
|
||
{Timestamp: currentTime, Value: value},
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Build guest type map with canonical v6 names.
|
||
guestTypes := make(map[string]string)
|
||
for _, vm := range readState.VMs() {
|
||
if vm == nil {
|
||
continue
|
||
}
|
||
if sid := vm.SourceID(); sid != "" {
|
||
guestTypes[sid] = "vm"
|
||
}
|
||
}
|
||
for _, ct := range readState.Containers() {
|
||
if ct == nil {
|
||
continue
|
||
}
|
||
if sid := ct.SourceID(); sid != "" {
|
||
guestTypes[sid] = "system-container"
|
||
}
|
||
}
|
||
for _, dc := range readState.DockerContainers() {
|
||
if dc == nil {
|
||
continue
|
||
}
|
||
if key := strings.TrimSpace(dc.ID()); key != "" {
|
||
guestTypes[key] = "app-container"
|
||
}
|
||
}
|
||
|
||
// Process Docker containers - batch-load historical data (1-2 SQL calls instead of N).
|
||
dockerData := make(map[string]VMChartData)
|
||
dcList := readState.DockerContainers()
|
||
dcRequests := make([]monitoring.GuestChartRequest, 0, len(dcList))
|
||
for _, dc := range dcList {
|
||
_, request, ok := appContainerChartRequest(dc)
|
||
if !ok {
|
||
continue
|
||
}
|
||
dcRequests = append(dcRequests, request)
|
||
}
|
||
dcBatchMetrics := monitor.GetGuestMetricsForChartBatch("dockerContainer", dcRequests, duration, infrastructureSummaryMetricOrder...)
|
||
for _, dc := range dcList {
|
||
responseKey, request, ok := appContainerChartRequest(dc)
|
||
if !ok {
|
||
continue
|
||
}
|
||
dockerData[responseKey] = make(VMChartData)
|
||
if batchMetrics, ok := dcBatchMetrics[request.SQLResourceID]; ok {
|
||
for metricType, points := range batchMetrics {
|
||
if !sparklineMetrics[metricType] {
|
||
continue
|
||
}
|
||
dockerData[responseKey][metricType] = make([]MetricPoint, len(points))
|
||
for i, point := range points {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts < oldestTimestamp {
|
||
oldestTimestamp = ts
|
||
}
|
||
dockerData[responseKey][metricType][i] = MetricPoint{
|
||
Timestamp: ts,
|
||
Value: point.Value,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if len(dockerData[responseKey]["cpu"]) == 0 {
|
||
dockerData[responseKey]["cpu"] = []MetricPoint{{Timestamp: currentTime, Value: dc.CPUPercent()}}
|
||
dockerData[responseKey]["memory"] = []MetricPoint{{Timestamp: currentTime, Value: dc.MemoryPercent()}}
|
||
dockerData[responseKey]["disk"] = []MetricPoint{{Timestamp: currentTime, Value: dc.DiskPercent()}}
|
||
}
|
||
}
|
||
|
||
// Process Docker hosts - batch-load historical data (1-2 SQL calls instead of N).
|
||
dockerHostData := make(map[string]VMChartData)
|
||
dhList := readState.DockerHosts()
|
||
dhRequests := make([]monitoring.GuestChartRequest, 0, len(dhList))
|
||
for _, dh := range dhList {
|
||
if dh == nil {
|
||
continue
|
||
}
|
||
if dhID := dh.HostSourceID(); dhID != "" {
|
||
dhRequests = append(dhRequests, monitoring.GuestChartRequest{
|
||
InMemoryKey: fmt.Sprintf("dockerHost:%s", dhID),
|
||
SQLResourceID: dhID,
|
||
})
|
||
}
|
||
}
|
||
dhBatchMetrics := monitor.GetGuestMetricsForChartBatch("dockerHost", dhRequests, duration, infrastructureSummaryMetricOrder...)
|
||
for _, dh := range dhList {
|
||
if dh == nil {
|
||
continue
|
||
}
|
||
dhID := dh.HostSourceID()
|
||
if dhID == "" {
|
||
continue
|
||
}
|
||
dockerHostData[dhID] = make(VMChartData)
|
||
if batchMetrics, ok := dhBatchMetrics[dhID]; ok {
|
||
for metricType, points := range batchMetrics {
|
||
if !sparklineMetrics[metricType] {
|
||
continue
|
||
}
|
||
dockerHostData[dhID][metricType] = make([]MetricPoint, len(points))
|
||
for i, point := range points {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts < oldestTimestamp {
|
||
oldestTimestamp = ts
|
||
}
|
||
dockerHostData[dhID][metricType][i] = MetricPoint{
|
||
Timestamp: ts,
|
||
Value: point.Value,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if len(dockerHostData[dhID]["cpu"]) == 0 {
|
||
dockerHostData[dhID]["cpu"] = []MetricPoint{{Timestamp: currentTime, Value: dh.CPUPercent()}}
|
||
dockerHostData[dhID]["memory"] = []MetricPoint{{Timestamp: currentTime, Value: dh.MemoryPercent()}}
|
||
var diskPercent float64
|
||
if disks := dh.Disks(); len(disks) > 0 {
|
||
diskPercent = disks[0].Usage
|
||
}
|
||
dockerHostData[dhID]["disk"] = []MetricPoint{{Timestamp: currentTime, Value: diskPercent}}
|
||
}
|
||
}
|
||
|
||
// Process unified agents - batch-load historical data (1-2 SQL calls instead of N).
|
||
agentData := make(map[string]VMChartData)
|
||
hostList := readState.Hosts()
|
||
agentRequests := make([]monitoring.GuestChartRequest, 0, len(hostList))
|
||
for _, h := range hostList {
|
||
_, request, ok := hostAgentChartRequest(h)
|
||
if !ok {
|
||
continue
|
||
}
|
||
agentRequests = append(agentRequests, request)
|
||
}
|
||
agentBatchMetrics := monitor.GetGuestMetricsForChartBatch("agent", agentRequests, duration, infrastructureSummaryMetricOrder...)
|
||
for _, h := range hostList {
|
||
hID, request, ok := hostAgentChartRequest(h)
|
||
if !ok {
|
||
continue
|
||
}
|
||
agentData[hID] = make(VMChartData)
|
||
if batchMetrics, ok := agentBatchMetrics[request.SQLResourceID]; ok {
|
||
for metricType, points := range batchMetrics {
|
||
if !sparklineMetrics[metricType] {
|
||
continue
|
||
}
|
||
agentData[hID][metricType] = make([]MetricPoint, len(points))
|
||
for i, point := range points {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts < oldestTimestamp {
|
||
oldestTimestamp = ts
|
||
}
|
||
agentData[hID][metricType][i] = MetricPoint{
|
||
Timestamp: ts,
|
||
Value: point.Value,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if len(agentData[hID]["cpu"]) == 0 {
|
||
agentData[hID]["cpu"] = []MetricPoint{{Timestamp: currentTime, Value: h.CPUPercent()}}
|
||
agentData[hID]["memory"] = []MetricPoint{{Timestamp: currentTime, Value: h.MemoryPercent()}}
|
||
agentData[hID]["disk"] = []MetricPoint{{Timestamp: currentTime, Value: h.DiskPercent()}}
|
||
}
|
||
}
|
||
|
||
countChartPoints := func(metricsMap map[string]VMChartData) int {
|
||
total := 0
|
||
for _, metricSeries := range metricsMap {
|
||
for _, points := range metricSeries {
|
||
total += len(points)
|
||
}
|
||
}
|
||
return total
|
||
}
|
||
|
||
countNodePoints := func(metricsMap map[string]NodeChartData) int {
|
||
total := 0
|
||
for _, metricSeries := range metricsMap {
|
||
for _, points := range metricSeries {
|
||
total += len(points)
|
||
}
|
||
}
|
||
return total
|
||
}
|
||
|
||
countStoragePoints := func(metricsMap map[string]StorageChartData) int {
|
||
total := 0
|
||
for _, metricSeries := range metricsMap {
|
||
for _, points := range metricSeries {
|
||
total += len(points)
|
||
}
|
||
}
|
||
return total
|
||
}
|
||
|
||
guestPoints := countChartPoints(chartData)
|
||
nodePoints := countNodePoints(nodeData)
|
||
storagePoints := countStoragePoints(storageData)
|
||
dockerContainerPoints := countChartPoints(dockerData)
|
||
dockerHostPoints := countChartPoints(dockerHostData)
|
||
agentPoints := countChartPoints(agentData)
|
||
|
||
response := ChartResponse{
|
||
ChartData: chartData,
|
||
NodeData: nodeData,
|
||
StorageData: storageData,
|
||
DockerData: dockerData,
|
||
DockerHostData: dockerHostData,
|
||
AgentData: agentData,
|
||
GuestTypes: guestTypes,
|
||
Timestamp: currentTime,
|
||
Stats: ChartStats{
|
||
OldestDataTimestamp: oldestTimestamp,
|
||
Range: timeRange,
|
||
RangeSeconds: int64(duration / time.Second),
|
||
MetricsStoreEnabled: metricsStoreEnabled,
|
||
PrimarySourceHint: primarySourceHint,
|
||
InMemoryThresholdSecs: int64(inMemoryChartThreshold / time.Second),
|
||
PointCounts: ChartPointCounts{
|
||
Total: guestPoints + nodePoints + storagePoints + dockerContainerPoints + dockerHostPoints + agentPoints,
|
||
Guests: guestPoints,
|
||
Nodes: nodePoints,
|
||
Storage: storagePoints,
|
||
DockerContainers: dockerContainerPoints,
|
||
DockerHosts: dockerHostPoints,
|
||
Agents: agentPoints,
|
||
},
|
||
},
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
if err := json.NewEncoder(w).Encode(response); err != nil {
|
||
log.Error().Err(err).Msg("Failed to encode chart data response")
|
||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
log.Debug().
|
||
Int("guests", len(chartData)).
|
||
Int("nodes", len(nodeData)).
|
||
Int("storage", len(storageData)).
|
||
Int("dockerContainers", len(dockerData)).
|
||
Int("agents", len(agentData)).
|
||
Str("range", timeRange).
|
||
Msg("Chart data response sent")
|
||
}
|
||
|
||
func parseWorkloadMaxPoints(raw string) int {
|
||
const (
|
||
defaultMaxPoints = 180
|
||
minMaxPoints = 30
|
||
maxMaxPoints = 500
|
||
)
|
||
|
||
trimmed := strings.TrimSpace(raw)
|
||
if trimmed == "" {
|
||
return defaultMaxPoints
|
||
}
|
||
|
||
value, err := strconv.Atoi(trimmed)
|
||
if err != nil {
|
||
return defaultMaxPoints
|
||
}
|
||
if value < minMaxPoints {
|
||
return minMaxPoints
|
||
}
|
||
if value > maxMaxPoints {
|
||
return maxMaxPoints
|
||
}
|
||
return value
|
||
}
|
||
|
||
func hostAgentChartRequest(host *unifiedresources.HostView) (string, monitoring.GuestChartRequest, bool) {
|
||
if host == nil {
|
||
return "", monitoring.GuestChartRequest{}, false
|
||
}
|
||
|
||
if agentID := strings.TrimSpace(host.AgentID()); agentID != "" {
|
||
return agentID, monitoring.GuestChartRequest{
|
||
InMemoryKey: fmt.Sprintf("agent:%s", agentID),
|
||
SQLResourceID: agentID,
|
||
}, true
|
||
}
|
||
|
||
target := host.MetricsTarget()
|
||
if target == nil {
|
||
return "", monitoring.GuestChartRequest{}, false
|
||
}
|
||
|
||
metricID := strings.TrimSpace(target.ResourceID)
|
||
if metricID == "" {
|
||
return "", monitoring.GuestChartRequest{}, false
|
||
}
|
||
|
||
return metricID, monitoring.GuestChartRequest{
|
||
InMemoryKey: fmt.Sprintf("agent:%s", metricID),
|
||
SQLResourceID: metricID,
|
||
}, true
|
||
}
|
||
|
||
func appContainerChartMetricID(container *unifiedresources.DockerContainerView) string {
|
||
if container == nil {
|
||
return ""
|
||
}
|
||
|
||
if target := container.MetricsTarget(); target != nil {
|
||
if metricID := strings.TrimSpace(target.ResourceID); metricID != "" {
|
||
return metricID
|
||
}
|
||
}
|
||
|
||
return strings.TrimSpace(container.ContainerID())
|
||
}
|
||
|
||
func appContainerChartRequest(container *unifiedresources.DockerContainerView) (string, monitoring.GuestChartRequest, bool) {
|
||
if container == nil {
|
||
return "", monitoring.GuestChartRequest{}, false
|
||
}
|
||
|
||
responseKey := strings.TrimSpace(container.ID())
|
||
if responseKey == "" {
|
||
responseKey = strings.TrimSpace(container.ContainerID())
|
||
}
|
||
metricID := appContainerChartMetricID(container)
|
||
if responseKey == "" || metricID == "" {
|
||
return "", monitoring.GuestChartRequest{}, false
|
||
}
|
||
|
||
return responseKey, monitoring.GuestChartRequest{
|
||
InMemoryKey: fmt.Sprintf("docker:%s", metricID),
|
||
SQLResourceID: metricID,
|
||
}, true
|
||
}
|
||
|
||
func canonicalGuestResponseKey(resourceID, instance, node string, vmid int) string {
|
||
trimmedInstance := strings.TrimSpace(instance)
|
||
trimmedNode := strings.TrimSpace(node)
|
||
if trimmedInstance != "" && trimmedNode != "" && vmid > 0 {
|
||
return fmt.Sprintf("%s:%s:%d", trimmedInstance, trimmedNode, vmid)
|
||
}
|
||
return strings.TrimSpace(resourceID)
|
||
}
|
||
|
||
func vmChartMetricID(vm *unifiedresources.VMView) string {
|
||
if vm == nil {
|
||
return ""
|
||
}
|
||
|
||
if target := vm.MetricsTarget(); target != nil {
|
||
if metricID := strings.TrimSpace(target.ResourceID); metricID != "" {
|
||
return metricID
|
||
}
|
||
}
|
||
|
||
return strings.TrimSpace(vm.SourceID())
|
||
}
|
||
|
||
func vmChartRequest(vm *unifiedresources.VMView) (string, monitoring.GuestChartRequest, bool) {
|
||
if vm == nil {
|
||
return "", monitoring.GuestChartRequest{}, false
|
||
}
|
||
|
||
responseKey := canonicalGuestResponseKey(vm.ID(), vm.Instance(), vm.Node(), vm.VMID())
|
||
metricID := vmChartMetricID(vm)
|
||
if responseKey == "" || metricID == "" {
|
||
return "", monitoring.GuestChartRequest{}, false
|
||
}
|
||
|
||
return responseKey, monitoring.GuestChartRequest{
|
||
InMemoryKey: metricID,
|
||
SQLResourceID: metricID,
|
||
}, true
|
||
}
|
||
|
||
func systemContainerChartMetricID(container *unifiedresources.ContainerView) string {
|
||
if container == nil {
|
||
return ""
|
||
}
|
||
|
||
if target := container.MetricsTarget(); target != nil {
|
||
if metricID := strings.TrimSpace(target.ResourceID); metricID != "" {
|
||
return metricID
|
||
}
|
||
}
|
||
|
||
return strings.TrimSpace(container.SourceID())
|
||
}
|
||
|
||
func systemContainerChartRequest(container *unifiedresources.ContainerView) (string, monitoring.GuestChartRequest, bool) {
|
||
if container == nil {
|
||
return "", monitoring.GuestChartRequest{}, false
|
||
}
|
||
|
||
responseKey := canonicalGuestResponseKey(container.ID(), container.Instance(), container.Node(), container.VMID())
|
||
metricID := systemContainerChartMetricID(container)
|
||
if responseKey == "" || metricID == "" {
|
||
return "", monitoring.GuestChartRequest{}, false
|
||
}
|
||
|
||
return responseKey, monitoring.GuestChartRequest{
|
||
InMemoryKey: metricID,
|
||
SQLResourceID: metricID,
|
||
}, true
|
||
}
|
||
|
||
func capMetricPointSeriesByIndex(points []MetricPoint, maxPoints int) []MetricPoint {
|
||
if len(points) <= maxPoints || maxPoints <= 0 {
|
||
return points
|
||
}
|
||
if maxPoints == 1 {
|
||
return []MetricPoint{points[len(points)-1]}
|
||
}
|
||
|
||
result := make([]MetricPoint, 0, maxPoints)
|
||
step := float64(len(points)-1) / float64(maxPoints-1)
|
||
prevIndex := -1
|
||
|
||
for i := 0; i < maxPoints; i++ {
|
||
index := int(float64(i)*step + 0.5)
|
||
if index <= prevIndex {
|
||
index = prevIndex + 1
|
||
}
|
||
if index >= len(points) {
|
||
index = len(points) - 1
|
||
}
|
||
result = append(result, points[index])
|
||
prevIndex = index
|
||
}
|
||
|
||
if result[len(result)-1].Timestamp != points[len(points)-1].Timestamp {
|
||
result[len(result)-1] = points[len(points)-1]
|
||
}
|
||
return result
|
||
}
|
||
|
||
const (
|
||
infrastructureSummaryMinSeriesPoints = 24
|
||
infrastructureSummaryMaxSeriesPoints = 96
|
||
workloadsSummaryMinSeriesPoints = 24
|
||
workloadsSummaryMaxSeriesPoints = 96
|
||
)
|
||
|
||
// capMetricPointSeries keeps mixed-cadence series visually proportional across
|
||
// the selected time window. Index-based capping over-selects recent dense
|
||
// samples, which bunches the right edge on long ranges.
|
||
func capMetricPointSeries(points []MetricPoint, maxPoints int) []MetricPoint {
|
||
if len(points) <= maxPoints || maxPoints <= 0 {
|
||
return points
|
||
}
|
||
if maxPoints == 1 {
|
||
return []MetricPoint{points[len(points)-1]}
|
||
}
|
||
|
||
startTimestamp := points[0].Timestamp
|
||
endTimestamp := points[len(points)-1].Timestamp
|
||
if endTimestamp <= startTimestamp {
|
||
return capMetricPointSeriesByIndex(points, maxPoints)
|
||
}
|
||
|
||
bucketSpan := float64(endTimestamp-startTimestamp) / float64(maxPoints-1)
|
||
if bucketSpan < 1 {
|
||
return capMetricPointSeriesByIndex(points, maxPoints)
|
||
}
|
||
|
||
type timeBucketRepresentative struct {
|
||
point MetricPoint
|
||
distance float64
|
||
ok bool
|
||
}
|
||
|
||
buckets := make([]timeBucketRepresentative, maxPoints)
|
||
for _, point := range points {
|
||
index := int(math.Round(float64(point.Timestamp-startTimestamp) / bucketSpan))
|
||
if index < 0 {
|
||
index = 0
|
||
}
|
||
if index >= maxPoints {
|
||
index = maxPoints - 1
|
||
}
|
||
|
||
targetTimestamp := float64(startTimestamp) + bucketSpan*float64(index)
|
||
distance := math.Abs(float64(point.Timestamp) - targetTimestamp)
|
||
current := buckets[index]
|
||
if !current.ok ||
|
||
distance < current.distance ||
|
||
(distance == current.distance && point.Timestamp > current.point.Timestamp) {
|
||
buckets[index] = timeBucketRepresentative{
|
||
point: point,
|
||
distance: distance,
|
||
ok: true,
|
||
}
|
||
}
|
||
}
|
||
|
||
result := make([]MetricPoint, 0, maxPoints)
|
||
result = append(result, points[0])
|
||
lastAddedTimestamp := points[0].Timestamp
|
||
for index := 1; index < maxPoints-1; index++ {
|
||
bucket := buckets[index]
|
||
if !bucket.ok {
|
||
continue
|
||
}
|
||
if bucket.point.Timestamp <= lastAddedTimestamp {
|
||
continue
|
||
}
|
||
result = append(result, bucket.point)
|
||
lastAddedTimestamp = bucket.point.Timestamp
|
||
}
|
||
|
||
lastPoint := points[len(points)-1]
|
||
if lastPoint.Timestamp <= lastAddedTimestamp {
|
||
result[len(result)-1] = lastPoint
|
||
return result
|
||
}
|
||
|
||
result = append(result, lastPoint)
|
||
return result
|
||
}
|
||
|
||
func targetBoundedSummarySeriesPoints(duration time.Duration, minPoints, maxPoints int) int {
|
||
if duration <= 0 {
|
||
return minPoints
|
||
}
|
||
|
||
target := int(duration / time.Minute)
|
||
if target < minPoints {
|
||
target = minPoints
|
||
}
|
||
if target > maxPoints {
|
||
target = maxPoints
|
||
}
|
||
if target < 2 {
|
||
target = 2
|
||
}
|
||
return target
|
||
}
|
||
|
||
type infrastructureSummaryBucket struct {
|
||
count int
|
||
sum float64
|
||
max float64
|
||
firstTimestamp int64
|
||
lastTimestamp int64
|
||
lastValue float64
|
||
}
|
||
|
||
func targetInfrastructureSummarySeriesPoints(duration time.Duration) int {
|
||
return targetBoundedSummarySeriesPoints(
|
||
duration,
|
||
infrastructureSummaryMinSeriesPoints,
|
||
infrastructureSummaryMaxSeriesPoints,
|
||
)
|
||
}
|
||
|
||
func targetWorkloadsSummarySeriesPoints(duration time.Duration) int {
|
||
return targetBoundedSummarySeriesPoints(
|
||
duration,
|
||
workloadsSummaryMinSeriesPoints,
|
||
workloadsSummaryMaxSeriesPoints,
|
||
)
|
||
}
|
||
|
||
func aggregateInfrastructureSummaryBucketValue(
|
||
metricType string,
|
||
bucket infrastructureSummaryBucket,
|
||
isLastBucket bool,
|
||
) float64 {
|
||
if bucket.count == 0 {
|
||
return 0
|
||
}
|
||
if isLastBucket {
|
||
return bucket.lastValue
|
||
}
|
||
|
||
switch metricType {
|
||
case "memory", "disk":
|
||
return bucket.sum / float64(bucket.count)
|
||
default:
|
||
return bucket.max
|
||
}
|
||
}
|
||
|
||
// normalizeInfrastructureSummaryMetricPointSeries folds mixed-cadence history
|
||
// into equal-time buckets for the infrastructure summary endpoint so long-range
|
||
// sparklines do not bunch recent higher-resolution samples at the right edge.
|
||
func normalizeInfrastructureSummaryMetricPointSeries(
|
||
points []MetricPoint,
|
||
metricType string,
|
||
duration time.Duration,
|
||
windowEndMillis int64,
|
||
) []MetricPoint {
|
||
targetPoints := targetInfrastructureSummarySeriesPoints(duration)
|
||
if len(points) <= targetPoints || targetPoints < 2 || duration <= 0 {
|
||
return points
|
||
}
|
||
|
||
durationMillis := int64(duration / time.Millisecond)
|
||
if durationMillis <= 0 {
|
||
return points
|
||
}
|
||
|
||
windowStartMillis := windowEndMillis - durationMillis
|
||
bucketCount := targetPoints
|
||
buckets := make([]infrastructureSummaryBucket, bucketCount)
|
||
firstNonEmpty := -1
|
||
lastNonEmpty := -1
|
||
|
||
for _, point := range points {
|
||
if point.Timestamp < windowStartMillis || point.Timestamp > windowEndMillis {
|
||
continue
|
||
}
|
||
bucketIndex := int(((point.Timestamp - windowStartMillis) * int64(bucketCount)) / durationMillis)
|
||
if bucketIndex < 0 {
|
||
bucketIndex = 0
|
||
}
|
||
if bucketIndex >= bucketCount {
|
||
bucketIndex = bucketCount - 1
|
||
}
|
||
|
||
bucket := &buckets[bucketIndex]
|
||
if bucket.count == 0 {
|
||
bucket.max = point.Value
|
||
bucket.firstTimestamp = point.Timestamp
|
||
if firstNonEmpty == -1 {
|
||
firstNonEmpty = bucketIndex
|
||
}
|
||
} else if point.Value > bucket.max {
|
||
bucket.max = point.Value
|
||
}
|
||
bucket.count++
|
||
bucket.sum += point.Value
|
||
bucket.lastTimestamp = point.Timestamp
|
||
bucket.lastValue = point.Value
|
||
lastNonEmpty = bucketIndex
|
||
}
|
||
|
||
if firstNonEmpty == -1 || lastNonEmpty == -1 {
|
||
return points
|
||
}
|
||
|
||
result := make([]MetricPoint, 0, targetPoints)
|
||
for bucketIndex := 0; bucketIndex < bucketCount; bucketIndex++ {
|
||
bucket := buckets[bucketIndex]
|
||
if bucket.count == 0 {
|
||
continue
|
||
}
|
||
|
||
bucketStartMillis := windowStartMillis + (int64(bucketIndex)*durationMillis)/int64(bucketCount)
|
||
bucketEndMillis := windowStartMillis + (int64(bucketIndex+1)*durationMillis)/int64(bucketCount)
|
||
timestamp := bucketStartMillis + (bucketEndMillis-bucketStartMillis)/2
|
||
switch bucketIndex {
|
||
case firstNonEmpty:
|
||
timestamp = bucket.firstTimestamp
|
||
case lastNonEmpty:
|
||
timestamp = bucket.lastTimestamp
|
||
}
|
||
|
||
result = append(result, MetricPoint{
|
||
Timestamp: timestamp,
|
||
Value: aggregateInfrastructureSummaryBucketValue(
|
||
metricType,
|
||
bucket,
|
||
bucketIndex == lastNonEmpty,
|
||
),
|
||
})
|
||
}
|
||
|
||
if len(result) == 0 {
|
||
return points
|
||
}
|
||
return result
|
||
}
|
||
|
||
func normalizeInfrastructureSummaryChartSeries(
|
||
metrics map[string][]MetricPoint,
|
||
duration time.Duration,
|
||
windowEndMillis int64,
|
||
) {
|
||
for metricType, points := range metrics {
|
||
metrics[metricType] = normalizeInfrastructureSummaryMetricPointSeries(
|
||
points,
|
||
metricType,
|
||
duration,
|
||
windowEndMillis,
|
||
)
|
||
}
|
||
}
|
||
|
||
// sparklineMetrics lists the metric types consumed by summary sparklines
|
||
// and density maps. Metrics not in this set are omitted to keep payloads small.
|
||
var sparklineMetrics = map[string]bool{
|
||
"cpu": true,
|
||
"memory": true,
|
||
"disk": true,
|
||
"diskread": true,
|
||
"diskwrite": true,
|
||
"netin": true,
|
||
"netout": true,
|
||
}
|
||
|
||
var infrastructureSummaryMetricOrder = []string{
|
||
"cpu",
|
||
"memory",
|
||
"disk",
|
||
"diskread",
|
||
"diskwrite",
|
||
"netin",
|
||
"netout",
|
||
}
|
||
|
||
var workloadSummaryMetricOrder = []string{
|
||
"cpu",
|
||
"memory",
|
||
"disk",
|
||
"netin",
|
||
"netout",
|
||
}
|
||
|
||
func parseInfrastructureSummaryRequestedMetrics(
|
||
query url.Values,
|
||
) ([]string, map[string]bool, error) {
|
||
rawValues, ok := query["metrics"]
|
||
if !ok || len(rawValues) == 0 {
|
||
requested := make(map[string]bool, len(infrastructureSummaryMetricOrder))
|
||
for _, metricType := range infrastructureSummaryMetricOrder {
|
||
requested[metricType] = true
|
||
}
|
||
return append([]string(nil), infrastructureSummaryMetricOrder...), requested, nil
|
||
}
|
||
|
||
requestedList := make([]string, 0, len(infrastructureSummaryMetricOrder))
|
||
requestedSet := make(map[string]bool, len(infrastructureSummaryMetricOrder))
|
||
invalid := make([]string, 0)
|
||
|
||
for _, rawValue := range rawValues {
|
||
for _, part := range strings.Split(rawValue, ",") {
|
||
metricType := strings.TrimSpace(strings.ToLower(part))
|
||
if metricType == "" {
|
||
continue
|
||
}
|
||
if !sparklineMetrics[metricType] {
|
||
invalid = append(invalid, metricType)
|
||
continue
|
||
}
|
||
if requestedSet[metricType] {
|
||
continue
|
||
}
|
||
requestedSet[metricType] = true
|
||
requestedList = append(requestedList, metricType)
|
||
}
|
||
}
|
||
|
||
if len(invalid) > 0 {
|
||
return nil, nil, fmt.Errorf("invalid infrastructure metrics filter: %s", strings.Join(invalid, ", "))
|
||
}
|
||
if len(requestedList) == 0 {
|
||
return nil, nil, fmt.Errorf("infrastructure metrics filter must include at least one valid metric")
|
||
}
|
||
return requestedList, requestedSet, nil
|
||
}
|
||
|
||
func convertMetricsForChart(
|
||
metrics map[string][]monitoring.MetricPoint,
|
||
oldestTimestamp *int64,
|
||
maxPoints int,
|
||
) VMChartData {
|
||
converted := make(VMChartData, len(metrics))
|
||
for metricType, metricPoints := range metrics {
|
||
if !sparklineMetrics[metricType] {
|
||
continue
|
||
}
|
||
points := make([]MetricPoint, len(metricPoints))
|
||
for i, point := range metricPoints {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts < *oldestTimestamp {
|
||
*oldestTimestamp = ts
|
||
}
|
||
points[i] = MetricPoint{
|
||
Timestamp: ts,
|
||
Value: point.Value,
|
||
}
|
||
}
|
||
converted[metricType] = capMetricPointSeries(points, maxPoints)
|
||
}
|
||
return converted
|
||
}
|
||
|
||
const (
|
||
mockWorkloadMinSeriesPoints = 24
|
||
mockWorkloadMaxSeriesPoints = 180
|
||
)
|
||
|
||
func clampChartValue(value, min, max float64) float64 {
|
||
if math.IsNaN(value) || math.IsInf(value, 0) {
|
||
return min
|
||
}
|
||
if value < min {
|
||
return min
|
||
}
|
||
if value > max {
|
||
return max
|
||
}
|
||
return value
|
||
}
|
||
|
||
func hashChartSeed(parts ...string) uint64 {
|
||
h := fnv.New64a()
|
||
for _, p := range parts {
|
||
_, _ = h.Write([]byte(p))
|
||
_, _ = h.Write([]byte{0})
|
||
}
|
||
return h.Sum64()
|
||
}
|
||
|
||
func targetMockSeriesPoints(duration time.Duration, maxPoints int) int {
|
||
target := int(duration / (2 * time.Minute))
|
||
if target < mockWorkloadMinSeriesPoints {
|
||
target = mockWorkloadMinSeriesPoints
|
||
}
|
||
if maxPoints > 0 && target > maxPoints {
|
||
target = maxPoints
|
||
}
|
||
if target > mockWorkloadMaxSeriesPoints {
|
||
target = mockWorkloadMaxSeriesPoints
|
||
}
|
||
if target < 2 {
|
||
target = 2
|
||
}
|
||
return target
|
||
}
|
||
|
||
// mockMetricStyle returns the series style for a given metric type.
|
||
func mockMetricStyle(metricType string) monitoring.SeriesStyle {
|
||
switch metricType {
|
||
case "cpu", "diskread", "diskwrite", "netin", "netout":
|
||
return monitoring.StyleSpiky
|
||
case "memory":
|
||
return monitoring.StylePlateau
|
||
default:
|
||
return monitoring.StyleFlat
|
||
}
|
||
}
|
||
|
||
// generateStyledMockSeries produces a MetricPoint slice using the style-based
|
||
// generator from the monitoring package.
|
||
func generateStyledMockSeries(
|
||
nowMillis int64,
|
||
duration time.Duration,
|
||
numPoints int,
|
||
current float64,
|
||
resourceType string,
|
||
resourceID string,
|
||
metricType string,
|
||
) []MetricPoint {
|
||
style := mockMetricStyle(metricType)
|
||
|
||
durationMillis := int64(duration / time.Millisecond)
|
||
if durationMillis <= 0 {
|
||
durationMillis = int64(time.Minute / time.Millisecond)
|
||
}
|
||
step := durationMillis / int64(numPoints-1)
|
||
if step <= 0 {
|
||
step = 1
|
||
}
|
||
startMillis := nowMillis - durationMillis
|
||
timestamps := make([]time.Time, numPoints)
|
||
for i := 0; i < numPoints; i++ {
|
||
timestamps[i] = time.UnixMilli(startMillis + int64(i)*step)
|
||
}
|
||
values := monitoring.GenerateSeededResourceMetricSeriesForTimestamps(
|
||
current,
|
||
timestamps,
|
||
resourceType,
|
||
resourceID,
|
||
metricType,
|
||
style,
|
||
)
|
||
points := make([]MetricPoint, numPoints)
|
||
for i := 0; i < numPoints; i++ {
|
||
points[i] = MetricPoint{
|
||
Timestamp: startMillis + int64(i)*step,
|
||
Value: values[i],
|
||
}
|
||
}
|
||
return points
|
||
}
|
||
|
||
func updateOldestTimestampFromSeries(metrics VMChartData, oldestTimestamp *int64) {
|
||
if oldestTimestamp == nil {
|
||
return
|
||
}
|
||
for _, points := range metrics {
|
||
for _, point := range points {
|
||
if point.Timestamp < *oldestTimestamp {
|
||
*oldestTimestamp = point.Timestamp
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
func buildSyntheticMetricHistorySeries(
|
||
now time.Time,
|
||
duration time.Duration,
|
||
maxPoints int,
|
||
resourceType string,
|
||
resourceID string,
|
||
metricType string,
|
||
current float64,
|
||
) []monitoring.MetricPoint {
|
||
switch metricType {
|
||
case "disk", "diskread", "diskwrite":
|
||
case "smart_temp":
|
||
if current <= 0 {
|
||
return nil
|
||
}
|
||
default:
|
||
return nil
|
||
}
|
||
|
||
numPoints := targetMockSeriesPoints(duration, maxPoints)
|
||
series := generateStyledMockSeries(
|
||
now.UnixMilli(), duration, numPoints,
|
||
current, resourceType, resourceID, metricType,
|
||
)
|
||
|
||
converted := make([]monitoring.MetricPoint, len(series))
|
||
for i, point := range series {
|
||
converted[i] = monitoring.MetricPoint{
|
||
Timestamp: time.UnixMilli(point.Timestamp),
|
||
Value: point.Value,
|
||
}
|
||
}
|
||
|
||
return converted
|
||
}
|
||
|
||
func buildMockWorkloadMetricHistorySeries(
|
||
now time.Time,
|
||
duration time.Duration,
|
||
maxPoints int,
|
||
resourceType string,
|
||
resourceID string,
|
||
metricType string,
|
||
current float64,
|
||
) []monitoring.MetricPoint {
|
||
switch metricType {
|
||
case "cpu", "memory", "disk":
|
||
case "diskread", "diskwrite", "netin", "netout":
|
||
default:
|
||
return nil
|
||
}
|
||
|
||
numPoints := targetMockSeriesPoints(duration, maxPoints)
|
||
series := generateStyledMockSeries(
|
||
now.UnixMilli(), duration, numPoints,
|
||
current, resourceType, resourceID, metricType,
|
||
)
|
||
|
||
converted := make([]monitoring.MetricPoint, len(series))
|
||
for i, point := range series {
|
||
converted[i] = monitoring.MetricPoint{
|
||
Timestamp: time.UnixMilli(point.Timestamp),
|
||
Value: point.Value,
|
||
}
|
||
}
|
||
|
||
return converted
|
||
}
|
||
|
||
// handleWorkloadCharts serves workload-only chart data used by workloads
|
||
// sparklines. It intentionally excludes infrastructure/storage chart payloads
|
||
// to keep requests small and stable for large fleets.
|
||
func (r *Router) handleWorkloadCharts(w http.ResponseWriter, req *http.Request) {
|
||
log.Debug().Str("method", req.Method).Str("url", req.URL.String()).Msg("Workload charts endpoint hit")
|
||
const inMemoryChartThreshold = 2 * time.Hour
|
||
|
||
if req.Method != http.MethodGet && req.Method != http.MethodHead {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
query := req.URL.Query()
|
||
timeRange := query.Get("range")
|
||
if timeRange == "" {
|
||
timeRange = "1h"
|
||
}
|
||
selectedNodeID := strings.TrimSpace(query.Get("node"))
|
||
maxPoints := parseWorkloadMaxPoints(query.Get("maxPoints"))
|
||
duration := parseChartsRangeDuration(timeRange)
|
||
|
||
monitor := r.getTenantMonitor(req.Context())
|
||
if monitor == nil {
|
||
http.Error(w, "Tenant monitor is not available", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
nodes := monitor.NodesSnapshot()
|
||
readState := monitor.GetUnifiedReadStateOrSnapshot()
|
||
if readState == nil {
|
||
http.Error(w, "State unavailable", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
metricsStoreEnabled := monitor.GetMetricsStore() != nil
|
||
primarySourceHint := "memory"
|
||
if metricsStoreEnabled && duration > inMemoryChartThreshold {
|
||
primarySourceHint = "store_or_memory_fallback"
|
||
}
|
||
|
||
currentTime := time.Now().UnixMilli()
|
||
oldestTimestamp := currentTime
|
||
|
||
var selectedNode *models.Node
|
||
if selectedNodeID != "" {
|
||
for idx := range nodes {
|
||
if nodes[idx].ID == selectedNodeID {
|
||
selectedNode = &nodes[idx]
|
||
break
|
||
}
|
||
}
|
||
if selectedNode == nil {
|
||
log.Debug().
|
||
Str("selectedNodeID", selectedNodeID).
|
||
Msg("Workload charts node filter not found in current state; falling back to global scope")
|
||
}
|
||
}
|
||
|
||
matchesSelectedNode := func(instance, nodeName string) bool {
|
||
if selectedNodeID == "" {
|
||
return true
|
||
}
|
||
if selectedNode == nil {
|
||
return true
|
||
}
|
||
return strings.EqualFold(strings.TrimSpace(instance), strings.TrimSpace(selectedNode.Instance)) &&
|
||
strings.EqualFold(strings.TrimSpace(nodeName), strings.TrimSpace(selectedNode.Name))
|
||
}
|
||
|
||
matchesSelectedDockerHostView := func(host *unifiedresources.DockerHostView) bool {
|
||
if selectedNodeID == "" {
|
||
return true
|
||
}
|
||
if selectedNode == nil {
|
||
return true
|
||
}
|
||
if host == nil {
|
||
return false
|
||
}
|
||
nodeName := strings.TrimSpace(selectedNode.Name)
|
||
if nodeName == "" {
|
||
return false
|
||
}
|
||
return strings.EqualFold(strings.TrimSpace(host.Hostname()), nodeName) ||
|
||
strings.EqualFold(strings.TrimSpace(host.Name()), nodeName)
|
||
}
|
||
|
||
matchesSelectedAgentHostView := func(host *unifiedresources.HostView) bool {
|
||
if selectedNodeID == "" {
|
||
return true
|
||
}
|
||
if selectedNode == nil {
|
||
return true
|
||
}
|
||
if host == nil {
|
||
return false
|
||
}
|
||
nodeName := strings.TrimSpace(selectedNode.Name)
|
||
if nodeName == "" {
|
||
return false
|
||
}
|
||
return strings.EqualFold(strings.TrimSpace(host.Hostname()), nodeName) ||
|
||
strings.EqualFold(strings.TrimSpace(host.Name()), nodeName) ||
|
||
strings.EqualFold(strings.TrimSpace(host.AgentID()), nodeName) ||
|
||
strings.EqualFold(strings.TrimSpace(host.ID()), nodeName)
|
||
}
|
||
|
||
matchesSelectedKubernetesPodView := func(pod *unifiedresources.PodView) bool {
|
||
if selectedNodeID == "" {
|
||
return true
|
||
}
|
||
if selectedNode == nil {
|
||
return true
|
||
}
|
||
if pod == nil {
|
||
return false
|
||
}
|
||
nodeName := strings.TrimSpace(selectedNode.Name)
|
||
if nodeName == "" {
|
||
return false
|
||
}
|
||
return strings.EqualFold(strings.TrimSpace(pod.NodeName()), nodeName)
|
||
}
|
||
|
||
chartData := make(map[string]VMChartData)
|
||
dockerData := make(map[string]VMChartData)
|
||
|
||
guestTypes := make(map[string]string)
|
||
|
||
vmList := make([]*unifiedresources.VMView, 0)
|
||
vmResponseKeys := make([]string, 0)
|
||
vmRequests := make([]monitoring.GuestChartRequest, 0)
|
||
for _, vm := range readState.VMs() {
|
||
if vm == nil {
|
||
continue
|
||
}
|
||
if !matchesSelectedNode(vm.Instance(), vm.Node()) {
|
||
continue
|
||
}
|
||
|
||
responseKey, request, ok := vmChartRequest(vm)
|
||
if !ok {
|
||
continue
|
||
}
|
||
|
||
vmList = append(vmList, vm)
|
||
vmResponseKeys = append(vmResponseKeys, responseKey)
|
||
vmRequests = append(vmRequests, request)
|
||
}
|
||
containerList := make([]*unifiedresources.ContainerView, 0)
|
||
containerResponseKeys := make([]string, 0)
|
||
containerRequests := make([]monitoring.GuestChartRequest, 0)
|
||
for _, ct := range readState.Containers() {
|
||
if ct == nil {
|
||
continue
|
||
}
|
||
if !matchesSelectedNode(ct.Instance(), ct.Node()) {
|
||
continue
|
||
}
|
||
|
||
responseKey, request, ok := systemContainerChartRequest(ct)
|
||
if !ok {
|
||
continue
|
||
}
|
||
|
||
containerList = append(containerList, ct)
|
||
containerResponseKeys = append(containerResponseKeys, responseKey)
|
||
containerRequests = append(containerRequests, request)
|
||
}
|
||
podList := make([]*unifiedresources.PodView, 0)
|
||
podRequests := make([]monitoring.GuestChartRequest, 0)
|
||
for _, pod := range readState.Pods() {
|
||
if pod == nil {
|
||
continue
|
||
}
|
||
if !matchesSelectedKubernetesPodView(pod) {
|
||
continue
|
||
}
|
||
|
||
metricKey := kubernetesPodMetricIDFromView(pod)
|
||
if metricKey == "" {
|
||
continue
|
||
}
|
||
|
||
podList = append(podList, pod)
|
||
podRequests = append(podRequests, monitoring.GuestChartRequest{InMemoryKey: metricKey, SQLResourceID: metricKey})
|
||
}
|
||
dockerHostsByID := make(map[string]*unifiedresources.DockerHostView, len(readState.DockerHosts()))
|
||
for _, host := range readState.DockerHosts() {
|
||
if host == nil {
|
||
continue
|
||
}
|
||
dockerHostsByID[host.ID()] = host
|
||
}
|
||
agentHostsByID := make(map[string]*unifiedresources.HostView, len(readState.Hosts()))
|
||
for _, host := range readState.Hosts() {
|
||
if host == nil {
|
||
continue
|
||
}
|
||
agentHostsByID[host.ID()] = host
|
||
}
|
||
|
||
dockerContainerList := make([]*unifiedresources.DockerContainerView, 0)
|
||
dockerContainerRequests := make([]monitoring.GuestChartRequest, 0)
|
||
dockerContainerKeys := make([]string, 0)
|
||
for _, container := range readState.DockerContainers() {
|
||
if container == nil {
|
||
continue
|
||
}
|
||
|
||
if selectedNodeID != "" && selectedNode != nil {
|
||
host := dockerHostsByID[container.ParentID()]
|
||
if host != nil {
|
||
if !matchesSelectedDockerHostView(host) {
|
||
continue
|
||
}
|
||
} else {
|
||
agentHost := agentHostsByID[container.ParentID()]
|
||
if agentHost == nil || !matchesSelectedAgentHostView(agentHost) {
|
||
continue
|
||
}
|
||
}
|
||
}
|
||
|
||
responseKey, request, ok := appContainerChartRequest(container)
|
||
if !ok {
|
||
continue
|
||
}
|
||
dockerContainerList = append(dockerContainerList, container)
|
||
dockerContainerKeys = append(dockerContainerKeys, responseKey)
|
||
dockerContainerRequests = append(dockerContainerRequests, request)
|
||
}
|
||
var (
|
||
vmBatchMetrics map[string]map[string][]monitoring.MetricPoint
|
||
containerBatchMetrics map[string]map[string][]monitoring.MetricPoint
|
||
podBatchMetrics map[string]map[string][]monitoring.MetricPoint
|
||
dockerContainerBatchMetrics map[string]map[string][]monitoring.MetricPoint
|
||
)
|
||
var workloadChartsBatchWG sync.WaitGroup
|
||
workloadChartsBatchWG.Add(4)
|
||
go func() {
|
||
defer workloadChartsBatchWG.Done()
|
||
vmBatchMetrics = monitor.GetGuestMetricsForChartBatch("vm", vmRequests, duration, infrastructureSummaryMetricOrder...)
|
||
}()
|
||
go func() {
|
||
defer workloadChartsBatchWG.Done()
|
||
containerBatchMetrics = monitor.GetGuestMetricsForChartBatch("container", containerRequests, duration, infrastructureSummaryMetricOrder...)
|
||
}()
|
||
go func() {
|
||
defer workloadChartsBatchWG.Done()
|
||
podBatchMetrics = monitor.GetGuestMetricsForChartBatch("k8s", podRequests, duration, workloadSummaryMetricOrder...)
|
||
}()
|
||
go func() {
|
||
defer workloadChartsBatchWG.Done()
|
||
dockerContainerBatchMetrics = monitor.GetGuestMetricsForChartBatch("dockerContainer", dockerContainerRequests, duration, infrastructureSummaryMetricOrder...)
|
||
}()
|
||
workloadChartsBatchWG.Wait()
|
||
|
||
for idx, vm := range vmList {
|
||
responseKey := vmResponseKeys[idx]
|
||
metricID := vmRequests[idx].SQLResourceID
|
||
series := convertMetricsForChart(vmBatchMetrics[metricID], &oldestTimestamp, maxPoints)
|
||
guestTypes[responseKey] = "vm"
|
||
|
||
if len(series["cpu"]) == 0 {
|
||
series["cpu"] = []MetricPoint{{Timestamp: currentTime, Value: vm.CPUPercent()}}
|
||
series["memory"] = []MetricPoint{{Timestamp: currentTime, Value: vm.MemoryPercent()}}
|
||
series["disk"] = []MetricPoint{{Timestamp: currentTime, Value: vm.DiskPercent()}}
|
||
series["netin"] = []MetricPoint{{Timestamp: currentTime, Value: vm.NetIn()}}
|
||
series["netout"] = []MetricPoint{{Timestamp: currentTime, Value: vm.NetOut()}}
|
||
}
|
||
chartData[responseKey] = series
|
||
}
|
||
|
||
for idx, ct := range containerList {
|
||
responseKey := containerResponseKeys[idx]
|
||
metricID := containerRequests[idx].SQLResourceID
|
||
series := convertMetricsForChart(containerBatchMetrics[metricID], &oldestTimestamp, maxPoints)
|
||
guestTypes[responseKey] = "system-container"
|
||
|
||
if len(series["cpu"]) == 0 {
|
||
series["cpu"] = []MetricPoint{{Timestamp: currentTime, Value: ct.CPUPercent()}}
|
||
series["memory"] = []MetricPoint{{Timestamp: currentTime, Value: ct.MemoryPercent()}}
|
||
series["disk"] = []MetricPoint{{Timestamp: currentTime, Value: ct.DiskPercent()}}
|
||
series["netin"] = []MetricPoint{{Timestamp: currentTime, Value: ct.NetIn()}}
|
||
series["netout"] = []MetricPoint{{Timestamp: currentTime, Value: ct.NetOut()}}
|
||
}
|
||
chartData[responseKey] = series
|
||
}
|
||
|
||
for _, pod := range podList {
|
||
metricKey := kubernetesPodMetricIDFromView(pod)
|
||
series := convertMetricsForChart(podBatchMetrics[metricKey], &oldestTimestamp, maxPoints)
|
||
guestTypes[metricKey] = "k8s"
|
||
|
||
if len(series["cpu"]) == 0 {
|
||
series["cpu"] = []MetricPoint{{Timestamp: currentTime, Value: pod.CPUPercent()}}
|
||
series["memory"] = []MetricPoint{{Timestamp: currentTime, Value: pod.MemoryPercent()}}
|
||
series["disk"] = []MetricPoint{{Timestamp: currentTime, Value: pod.DiskPercent()}}
|
||
series["netin"] = []MetricPoint{{Timestamp: currentTime, Value: pod.NetInRate()}}
|
||
series["netout"] = []MetricPoint{{Timestamp: currentTime, Value: pod.NetOutRate()}}
|
||
}
|
||
chartData[metricKey] = series
|
||
}
|
||
|
||
for idx, container := range dockerContainerList {
|
||
responseKey := dockerContainerKeys[idx]
|
||
metricID := dockerContainerRequests[idx].SQLResourceID
|
||
series := convertMetricsForChart(dockerContainerBatchMetrics[metricID], &oldestTimestamp, maxPoints)
|
||
guestTypes[responseKey] = "app-container"
|
||
|
||
if len(series["cpu"]) == 0 {
|
||
series["cpu"] = []MetricPoint{{Timestamp: currentTime, Value: container.CPUPercent()}}
|
||
series["memory"] = []MetricPoint{{Timestamp: currentTime, Value: container.MemoryPercent()}}
|
||
series["disk"] = []MetricPoint{{Timestamp: currentTime, Value: container.DiskPercent()}}
|
||
series["netin"] = []MetricPoint{{Timestamp: currentTime, Value: container.NetInRate()}}
|
||
series["netout"] = []MetricPoint{{Timestamp: currentTime, Value: container.NetOutRate()}}
|
||
}
|
||
dockerData[responseKey] = series
|
||
}
|
||
|
||
countChartPoints := func(metricsMap map[string]VMChartData) int {
|
||
total := 0
|
||
for _, metricSeries := range metricsMap {
|
||
for _, points := range metricSeries {
|
||
total += len(points)
|
||
}
|
||
}
|
||
return total
|
||
}
|
||
|
||
guestPoints := countChartPoints(chartData)
|
||
dockerContainerPoints := countChartPoints(dockerData)
|
||
|
||
response := EmptyWorkloadChartsResponse()
|
||
response.ChartData = chartData
|
||
response.DockerData = dockerData
|
||
response.GuestTypes = guestTypes
|
||
response.Timestamp = currentTime
|
||
response.Stats = ChartStats{
|
||
OldestDataTimestamp: oldestTimestamp,
|
||
Range: timeRange,
|
||
RangeSeconds: int64(duration / time.Second),
|
||
MetricsStoreEnabled: metricsStoreEnabled,
|
||
PrimarySourceHint: primarySourceHint,
|
||
InMemoryThresholdSecs: int64(inMemoryChartThreshold / time.Second),
|
||
PointCounts: ChartPointCounts{
|
||
Total: guestPoints + dockerContainerPoints,
|
||
Guests: guestPoints,
|
||
DockerContainers: dockerContainerPoints,
|
||
},
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
if err := json.NewEncoder(w).Encode(response.NormalizeCollections()); err != nil {
|
||
log.Error().Err(err).Msg("Failed to encode workload chart data response")
|
||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
}
|
||
|
||
// parseChartsRangeDuration converts the UI chart range query (e.g. "5m", "1h")
|
||
// into a duration. This is shared by /api/charts and /api/charts/infrastructure
|
||
// to prevent drift.
|
||
func parseChartsRangeDuration(rangeStr string) time.Duration {
|
||
switch rangeStr {
|
||
case "5m":
|
||
return 5 * time.Minute
|
||
case "15m":
|
||
return 15 * time.Minute
|
||
case "30m":
|
||
return 30 * time.Minute
|
||
case "1h":
|
||
return time.Hour
|
||
case "4h":
|
||
return 4 * time.Hour
|
||
case "8h":
|
||
return 8 * time.Hour
|
||
case "12h":
|
||
return 12 * time.Hour
|
||
case "24h":
|
||
return 24 * time.Hour
|
||
case "7d":
|
||
return 7 * 24 * time.Hour
|
||
case "30d":
|
||
return 30 * 24 * time.Hour
|
||
default:
|
||
return time.Hour
|
||
}
|
||
}
|
||
|
||
// handleInfrastructureCharts serves infrastructure-only chart data.
|
||
// This is intentionally narrower than /api/charts to reduce payload size and server-side compute
|
||
// for the Infrastructure page summary cards.
|
||
func (r *Router) handleInfrastructureCharts(w http.ResponseWriter, req *http.Request) {
|
||
log.Debug().Str("method", req.Method).Str("url", req.URL.String()).Msg("Infrastructure charts endpoint hit")
|
||
const inMemoryChartThreshold = 2 * time.Hour
|
||
|
||
if req.Method != http.MethodGet && req.Method != http.MethodHead {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
// Get time range from query parameters
|
||
query := req.URL.Query()
|
||
timeRange := query.Get("range")
|
||
if timeRange == "" {
|
||
timeRange = "1h"
|
||
}
|
||
requestedMetricNames, requestedMetrics, err := parseInfrastructureSummaryRequestedMetrics(query)
|
||
if err != nil {
|
||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||
return
|
||
}
|
||
// Convert time range to duration.
|
||
duration := parseChartsRangeDuration(timeRange)
|
||
|
||
monitor := r.getTenantMonitor(req.Context())
|
||
if monitor == nil {
|
||
http.Error(w, "Tenant monitor is not available", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
readState := monitor.GetUnifiedReadStateOrSnapshot()
|
||
if readState == nil {
|
||
http.Error(w, "State unavailable", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
metricsStoreEnabled := monitor.GetMetricsStore() != nil
|
||
primarySourceHint := "memory"
|
||
if metricsStoreEnabled && duration > inMemoryChartThreshold {
|
||
primarySourceHint = "store_or_memory_fallback"
|
||
}
|
||
|
||
currentTime := time.Now().UnixMilli()
|
||
oldestTimestamp := currentTime
|
||
|
||
// Process Nodes - batch-load historical data (1-2 SQL calls instead of N×5).
|
||
nodeMetricTypes := make([]string, 0, 5)
|
||
for _, metricType := range []string{"cpu", "memory", "disk", "netin", "netout"} {
|
||
if requestedMetrics[metricType] {
|
||
nodeMetricTypes = append(nodeMetricTypes, metricType)
|
||
}
|
||
}
|
||
nodeData := make(map[string]NodeChartData)
|
||
nodeList := readState.Nodes()
|
||
nodeIDs := make([]string, 0, len(nodeList))
|
||
for _, node := range nodeList {
|
||
if node == nil {
|
||
continue
|
||
}
|
||
if nid := node.SourceID(); nid != "" {
|
||
nodeIDs = append(nodeIDs, nid)
|
||
}
|
||
}
|
||
nodeBatchMetrics := map[string]map[string][]monitoring.MetricPoint{}
|
||
if len(nodeMetricTypes) > 0 {
|
||
nodeBatchMetrics = monitor.GetNodeMetricsForChartBatch(nodeIDs, nodeMetricTypes, duration)
|
||
}
|
||
for _, node := range nodeList {
|
||
if node == nil {
|
||
continue
|
||
}
|
||
nid := node.SourceID()
|
||
if nid == "" {
|
||
continue
|
||
}
|
||
nodeData[nid] = make(NodeChartData)
|
||
if batchMetrics, ok := nodeBatchMetrics[nid]; ok {
|
||
for _, metricType := range nodeMetricTypes {
|
||
points, found := batchMetrics[metricType]
|
||
if !found {
|
||
continue
|
||
}
|
||
nodeData[nid][metricType] = make([]MetricPoint, len(points))
|
||
for i, point := range points {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts < oldestTimestamp {
|
||
oldestTimestamp = ts
|
||
}
|
||
nodeData[nid][metricType][i] = MetricPoint{
|
||
Timestamp: ts,
|
||
Value: point.Value,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
for _, metricType := range nodeMetricTypes {
|
||
if len(nodeData[nid][metricType]) > 0 {
|
||
continue
|
||
}
|
||
var value float64
|
||
hasFallbackValue := true
|
||
switch metricType {
|
||
case "cpu":
|
||
value = node.CPUPercent()
|
||
case "memory":
|
||
value = node.MemoryPercent()
|
||
case "disk":
|
||
value = node.DiskPercent()
|
||
default:
|
||
hasFallbackValue = false
|
||
}
|
||
if hasFallbackValue {
|
||
nodeData[nid][metricType] = []MetricPoint{
|
||
{Timestamp: currentTime, Value: value},
|
||
}
|
||
}
|
||
}
|
||
normalizeInfrastructureSummaryChartSeries(nodeData[nid], duration, currentTime)
|
||
}
|
||
|
||
// Process Docker hosts - batch-load historical data (1-2 SQL calls instead of N).
|
||
dockerHostData := make(map[string]VMChartData)
|
||
dhList := readState.DockerHosts()
|
||
dhRequests := make([]monitoring.GuestChartRequest, 0, len(dhList))
|
||
for _, dh := range dhList {
|
||
if dh == nil {
|
||
continue
|
||
}
|
||
if dhID := dh.HostSourceID(); dhID != "" {
|
||
dhRequests = append(dhRequests, monitoring.GuestChartRequest{
|
||
InMemoryKey: fmt.Sprintf("dockerHost:%s", dhID),
|
||
SQLResourceID: dhID,
|
||
})
|
||
}
|
||
}
|
||
dhBatchMetrics := monitor.GetGuestMetricsForChartBatch("dockerHost", dhRequests, duration, requestedMetricNames...)
|
||
for _, dh := range dhList {
|
||
if dh == nil {
|
||
continue
|
||
}
|
||
dhID := dh.HostSourceID()
|
||
if dhID == "" {
|
||
continue
|
||
}
|
||
dockerHostData[dhID] = make(VMChartData)
|
||
if batchMetrics, ok := dhBatchMetrics[dhID]; ok {
|
||
for metricType, points := range batchMetrics {
|
||
if !requestedMetrics[metricType] {
|
||
continue
|
||
}
|
||
dockerHostData[dhID][metricType] = make([]MetricPoint, len(points))
|
||
for i, point := range points {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts < oldestTimestamp {
|
||
oldestTimestamp = ts
|
||
}
|
||
dockerHostData[dhID][metricType][i] = MetricPoint{
|
||
Timestamp: ts,
|
||
Value: point.Value,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
for _, metricType := range requestedMetricNames {
|
||
if len(dockerHostData[dhID][metricType]) > 0 {
|
||
continue
|
||
}
|
||
var value float64
|
||
hasFallbackValue := true
|
||
switch metricType {
|
||
case "cpu":
|
||
value = dh.CPUPercent()
|
||
case "memory":
|
||
value = dh.MemoryPercent()
|
||
case "disk":
|
||
if disks := dh.Disks(); len(disks) > 0 {
|
||
value = disks[0].Usage
|
||
}
|
||
default:
|
||
hasFallbackValue = false
|
||
}
|
||
if hasFallbackValue {
|
||
dockerHostData[dhID][metricType] = []MetricPoint{{Timestamp: currentTime, Value: value}}
|
||
}
|
||
}
|
||
normalizeInfrastructureSummaryChartSeries(dockerHostData[dhID], duration, currentTime)
|
||
}
|
||
|
||
// Process unified agents - batch-load historical data (1-2 SQL calls instead of N).
|
||
agentData := make(map[string]VMChartData)
|
||
hostList := readState.Hosts()
|
||
agentRequests := make([]monitoring.GuestChartRequest, 0, len(hostList))
|
||
for _, h := range hostList {
|
||
_, request, ok := hostAgentChartRequest(h)
|
||
if !ok {
|
||
continue
|
||
}
|
||
agentRequests = append(agentRequests, request)
|
||
}
|
||
agentBatchMetrics := monitor.GetGuestMetricsForChartBatch("agent", agentRequests, duration, requestedMetricNames...)
|
||
for _, h := range hostList {
|
||
hID, request, ok := hostAgentChartRequest(h)
|
||
if !ok {
|
||
continue
|
||
}
|
||
agentData[hID] = make(VMChartData)
|
||
if batchMetrics, ok := agentBatchMetrics[request.SQLResourceID]; ok {
|
||
for metricType, points := range batchMetrics {
|
||
if !requestedMetrics[metricType] {
|
||
continue
|
||
}
|
||
agentData[hID][metricType] = make([]MetricPoint, len(points))
|
||
for i, point := range points {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts < oldestTimestamp {
|
||
oldestTimestamp = ts
|
||
}
|
||
agentData[hID][metricType][i] = MetricPoint{
|
||
Timestamp: ts,
|
||
Value: point.Value,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
for _, metricType := range requestedMetricNames {
|
||
if len(agentData[hID][metricType]) > 0 {
|
||
continue
|
||
}
|
||
var value float64
|
||
hasFallbackValue := true
|
||
switch metricType {
|
||
case "cpu":
|
||
value = h.CPUPercent()
|
||
case "memory":
|
||
value = h.MemoryPercent()
|
||
case "disk":
|
||
value = h.DiskPercent()
|
||
default:
|
||
hasFallbackValue = false
|
||
}
|
||
if hasFallbackValue {
|
||
agentData[hID][metricType] = []MetricPoint{{Timestamp: currentTime, Value: value}}
|
||
}
|
||
}
|
||
normalizeInfrastructureSummaryChartSeries(agentData[hID], duration, currentTime)
|
||
}
|
||
|
||
countNodePoints := func(metricsMap map[string]NodeChartData) int {
|
||
total := 0
|
||
for _, metricSeries := range metricsMap {
|
||
for _, points := range metricSeries {
|
||
total += len(points)
|
||
}
|
||
}
|
||
return total
|
||
}
|
||
countChartPoints := func(metricsMap map[string]VMChartData) int {
|
||
total := 0
|
||
for _, metricSeries := range metricsMap {
|
||
for _, points := range metricSeries {
|
||
total += len(points)
|
||
}
|
||
}
|
||
return total
|
||
}
|
||
|
||
nodePoints := countNodePoints(nodeData)
|
||
dockerHostPoints := countChartPoints(dockerHostData)
|
||
agentPoints := countChartPoints(agentData)
|
||
|
||
response := EmptyInfrastructureChartsResponse()
|
||
response.NodeData = nodeData
|
||
response.DockerHostData = dockerHostData
|
||
response.AgentData = agentData
|
||
response.Timestamp = currentTime
|
||
response.Stats = ChartStats{
|
||
OldestDataTimestamp: oldestTimestamp,
|
||
Range: timeRange,
|
||
RangeSeconds: int64(duration / time.Second),
|
||
MetricsStoreEnabled: metricsStoreEnabled,
|
||
PrimarySourceHint: primarySourceHint,
|
||
InMemoryThresholdSecs: int64(inMemoryChartThreshold / time.Second),
|
||
PointCounts: ChartPointCounts{
|
||
Total: nodePoints + dockerHostPoints + agentPoints,
|
||
Nodes: nodePoints,
|
||
DockerHosts: dockerHostPoints,
|
||
Agents: agentPoints,
|
||
},
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
if err := json.NewEncoder(w).Encode(response.NormalizeCollections()); err != nil {
|
||
log.Error().Err(err).Msg("Failed to encode infrastructure chart data response")
|
||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
}
|
||
|
||
type workloadSummaryBuckets struct {
|
||
cpu []float64
|
||
memory []float64
|
||
disk []float64
|
||
network []float64
|
||
}
|
||
|
||
type workloadsSummarySnapshot struct {
|
||
id string
|
||
name string
|
||
cpu float64
|
||
memory float64
|
||
disk float64
|
||
network float64
|
||
}
|
||
|
||
func workloadSummaryBucketTimestamp(timestampMs int64) int64 {
|
||
const bucketSizeMs = int64(30_000)
|
||
return (timestampMs / bucketSizeMs) * bucketSizeMs
|
||
}
|
||
|
||
func clampWorkloadPercent(value float64) float64 {
|
||
if value != value {
|
||
return 0
|
||
}
|
||
if value < 0 {
|
||
return 0
|
||
}
|
||
if value > 100 {
|
||
return 100
|
||
}
|
||
return value
|
||
}
|
||
|
||
func clampNonNegativeWorkloadValue(value float64) float64 {
|
||
if value != value {
|
||
return 0
|
||
}
|
||
if value < 0 {
|
||
return 0
|
||
}
|
||
return value
|
||
}
|
||
|
||
func kubernetesClusterKey(cluster models.KubernetesCluster) string {
|
||
if value := strings.TrimSpace(cluster.ID); value != "" {
|
||
return value
|
||
}
|
||
if value := strings.TrimSpace(cluster.Name); value != "" {
|
||
return value
|
||
}
|
||
if value := strings.TrimSpace(cluster.DisplayName); value != "" {
|
||
return value
|
||
}
|
||
return "k8s-cluster"
|
||
}
|
||
|
||
func kubernetesPodIdentifier(pod models.KubernetesPod) string {
|
||
if value := strings.TrimSpace(pod.UID); value != "" {
|
||
return value
|
||
}
|
||
namespace := strings.TrimSpace(pod.Namespace)
|
||
name := strings.TrimSpace(pod.Name)
|
||
if namespace != "" || name != "" {
|
||
return fmt.Sprintf("%s/%s", namespace, name)
|
||
}
|
||
return "pod"
|
||
}
|
||
|
||
func kubernetesPodMetricID(cluster models.KubernetesCluster, pod models.KubernetesPod) string {
|
||
clusterKey := kubernetesClusterKey(cluster)
|
||
podKey := kubernetesPodIdentifier(pod)
|
||
if clusterKey == "" || podKey == "" {
|
||
return ""
|
||
}
|
||
return fmt.Sprintf("k8s:%s:pod:%s", clusterKey, podKey)
|
||
}
|
||
|
||
func kubernetesPodMetricIDFromView(pod *unifiedresources.PodView) string {
|
||
if pod == nil {
|
||
return ""
|
||
}
|
||
clusterKey := strings.TrimSpace(pod.ClusterID())
|
||
if clusterKey == "" {
|
||
clusterKey = strings.TrimSpace(pod.ClusterName())
|
||
}
|
||
podKey := strings.TrimSpace(pod.PodUID())
|
||
if podKey == "" {
|
||
namespace := strings.TrimSpace(pod.Namespace())
|
||
name := strings.TrimSpace(pod.Name())
|
||
if namespace != "" || name != "" {
|
||
podKey = fmt.Sprintf("%s/%s", namespace, name)
|
||
}
|
||
}
|
||
if clusterKey == "" || podKey == "" {
|
||
return ""
|
||
}
|
||
return fmt.Sprintf("k8s:%s:pod:%s", clusterKey, podKey)
|
||
}
|
||
|
||
func kubernetesPodDisplayName(pod models.KubernetesPod) string {
|
||
name := strings.TrimSpace(pod.Name)
|
||
namespace := strings.TrimSpace(pod.Namespace)
|
||
if namespace == "" {
|
||
if name == "" {
|
||
return kubernetesPodIdentifier(pod)
|
||
}
|
||
return name
|
||
}
|
||
if name == "" {
|
||
return namespace
|
||
}
|
||
return fmt.Sprintf("%s/%s", namespace, name)
|
||
}
|
||
|
||
func kubernetesPodIsRunning(pod models.KubernetesPod) bool {
|
||
return strings.EqualFold(strings.TrimSpace(pod.Phase), "running")
|
||
}
|
||
|
||
func kubernetesPodCurrentMetrics(cluster models.KubernetesCluster, pod models.KubernetesPod) map[string]float64 {
|
||
cpuPercent := clampWorkloadPercent(pod.UsageCPUPercent)
|
||
memoryPercent := clampWorkloadPercent(pod.UsageMemoryPercent)
|
||
|
||
if memoryPercent <= 0 && pod.UsageMemoryBytes > 0 {
|
||
totalBytes := kubernetesPodMemoryTotalBytes(cluster, pod)
|
||
if totalBytes > 0 {
|
||
memoryPercent = clampWorkloadPercent((float64(pod.UsageMemoryBytes) / float64(totalBytes)) * 100)
|
||
}
|
||
}
|
||
|
||
diskPercent := clampWorkloadPercent(pod.DiskUsagePercent)
|
||
netIn := clampNonNegativeWorkloadValue(pod.NetInRate)
|
||
netOut := clampNonNegativeWorkloadValue(pod.NetOutRate)
|
||
|
||
return map[string]float64{
|
||
"cpu": cpuPercent,
|
||
"memory": memoryPercent,
|
||
"disk": diskPercent,
|
||
"diskread": 0,
|
||
"diskwrite": 0,
|
||
"netin": netIn,
|
||
"netout": netOut,
|
||
}
|
||
}
|
||
|
||
func kubernetesPodMemoryTotalBytes(cluster models.KubernetesCluster, pod models.KubernetesPod) int64 {
|
||
nodeName := strings.TrimSpace(pod.NodeName)
|
||
if nodeName == "" {
|
||
return 0
|
||
}
|
||
for _, node := range cluster.Nodes {
|
||
if !strings.EqualFold(strings.TrimSpace(node.Name), nodeName) {
|
||
continue
|
||
}
|
||
if node.AllocMemoryBytes > 0 {
|
||
return node.AllocMemoryBytes
|
||
}
|
||
if node.CapacityMemoryBytes > 0 {
|
||
return node.CapacityMemoryBytes
|
||
}
|
||
return 0
|
||
}
|
||
return 0
|
||
}
|
||
|
||
func getOrCreateWorkloadBucket(buckets map[int64]*workloadSummaryBuckets, bucketTs int64) *workloadSummaryBuckets {
|
||
if bucket, ok := buckets[bucketTs]; ok {
|
||
return bucket
|
||
}
|
||
bucket := &workloadSummaryBuckets{}
|
||
buckets[bucketTs] = bucket
|
||
return bucket
|
||
}
|
||
|
||
func appendWorkloadMetricPoints(
|
||
buckets map[int64]*workloadSummaryBuckets,
|
||
points []monitoring.MetricPoint,
|
||
target string,
|
||
oldestTimestamp *int64,
|
||
) int {
|
||
added := 0
|
||
for _, point := range points {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts <= 0 {
|
||
continue
|
||
}
|
||
if ts < *oldestTimestamp {
|
||
*oldestTimestamp = ts
|
||
}
|
||
bucketTs := workloadSummaryBucketTimestamp(ts)
|
||
bucket := getOrCreateWorkloadBucket(buckets, bucketTs)
|
||
value := clampNonNegativeWorkloadValue(point.Value)
|
||
switch target {
|
||
case "cpu", "memory", "disk":
|
||
value = clampWorkloadPercent(value)
|
||
}
|
||
switch target {
|
||
case "cpu":
|
||
bucket.cpu = append(bucket.cpu, value)
|
||
case "memory":
|
||
bucket.memory = append(bucket.memory, value)
|
||
case "disk":
|
||
bucket.disk = append(bucket.disk, value)
|
||
case "network":
|
||
bucket.network = append(bucket.network, value)
|
||
}
|
||
added++
|
||
}
|
||
return added
|
||
}
|
||
|
||
func mergeWorkloadNetworkPoints(
|
||
netIn []monitoring.MetricPoint,
|
||
netOut []monitoring.MetricPoint,
|
||
) []monitoring.MetricPoint {
|
||
totals := make(map[int64]float64)
|
||
for _, point := range netIn {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts <= 0 {
|
||
continue
|
||
}
|
||
totals[ts] += clampNonNegativeWorkloadValue(point.Value)
|
||
}
|
||
for _, point := range netOut {
|
||
ts := point.Timestamp.UnixMilli()
|
||
if ts <= 0 {
|
||
continue
|
||
}
|
||
totals[ts] += clampNonNegativeWorkloadValue(point.Value)
|
||
}
|
||
if len(totals) == 0 {
|
||
return nil
|
||
}
|
||
keys := make([]int64, 0, len(totals))
|
||
for ts := range totals {
|
||
keys = append(keys, ts)
|
||
}
|
||
sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })
|
||
points := make([]monitoring.MetricPoint, 0, len(keys))
|
||
for _, ts := range keys {
|
||
points = append(points, monitoring.MetricPoint{
|
||
Timestamp: time.UnixMilli(ts),
|
||
Value: totals[ts],
|
||
})
|
||
}
|
||
return points
|
||
}
|
||
|
||
func averageValue(values []float64) float64 {
|
||
if len(values) == 0 {
|
||
return 0
|
||
}
|
||
sum := 0.0
|
||
for _, value := range values {
|
||
sum += value
|
||
}
|
||
return sum / float64(len(values))
|
||
}
|
||
|
||
func maxValue(values []float64) float64 {
|
||
if len(values) == 0 {
|
||
return 0
|
||
}
|
||
max := values[0]
|
||
for i := 1; i < len(values); i++ {
|
||
if values[i] > max {
|
||
max = values[i]
|
||
}
|
||
}
|
||
return max
|
||
}
|
||
|
||
func buildWorkloadsSummaryMetric(
|
||
buckets map[int64]*workloadSummaryBuckets,
|
||
selector func(*workloadSummaryBuckets) []float64,
|
||
) WorkloadsSummaryMetricData {
|
||
keys := make([]int64, 0, len(buckets))
|
||
for ts := range buckets {
|
||
keys = append(keys, ts)
|
||
}
|
||
sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })
|
||
|
||
data := WorkloadsSummaryMetricData{
|
||
P50: make([]MetricPoint, 0, len(keys)),
|
||
P95: make([]MetricPoint, 0, len(keys)),
|
||
}
|
||
for _, ts := range keys {
|
||
values := selector(buckets[ts])
|
||
if len(values) == 0 {
|
||
continue
|
||
}
|
||
data.P50 = append(data.P50, MetricPoint{
|
||
Timestamp: ts,
|
||
Value: averageValue(values),
|
||
})
|
||
data.P95 = append(data.P95, MetricPoint{
|
||
Timestamp: ts,
|
||
Value: maxValue(values),
|
||
})
|
||
}
|
||
return data
|
||
}
|
||
|
||
func summaryMetricPointCount(metric WorkloadsSummaryMetricData) int {
|
||
return len(metric.P50) + len(metric.P95)
|
||
}
|
||
|
||
func normalizeWorkloadsSummaryMetricPointSeries(
|
||
metric WorkloadsSummaryMetricData,
|
||
duration time.Duration,
|
||
) WorkloadsSummaryMetricData {
|
||
targetPoints := targetWorkloadsSummarySeriesPoints(duration)
|
||
metric.P50 = capMetricPointSeries(metric.P50, targetPoints)
|
||
metric.P95 = capMetricPointSeries(metric.P95, targetPoints)
|
||
return metric
|
||
}
|
||
|
||
func latestSummaryMetricValue(points []monitoring.MetricPoint, fallback float64, clamp func(float64) float64) float64 {
|
||
if len(points) == 0 {
|
||
return clamp(fallback)
|
||
}
|
||
|
||
latest := points[0]
|
||
for i := 1; i < len(points); i++ {
|
||
if points[i].Timestamp.After(latest.Timestamp) {
|
||
latest = points[i]
|
||
}
|
||
}
|
||
return clamp(latest.Value)
|
||
}
|
||
|
||
func buildWorkloadsTopContributors(
|
||
snapshots []workloadsSummarySnapshot,
|
||
selector func(workloadsSummarySnapshot) float64,
|
||
) []WorkloadsSummaryContributor {
|
||
contributors := make([]WorkloadsSummaryContributor, 0, len(snapshots))
|
||
for _, snapshot := range snapshots {
|
||
value := selector(snapshot)
|
||
if value <= 0 {
|
||
continue
|
||
}
|
||
contributors = append(contributors, WorkloadsSummaryContributor{
|
||
ID: snapshot.id,
|
||
Name: snapshot.name,
|
||
Value: value,
|
||
})
|
||
}
|
||
|
||
sort.Slice(contributors, func(i, j int) bool {
|
||
if contributors[i].Value == contributors[j].Value {
|
||
if contributors[i].Name == contributors[j].Name {
|
||
return contributors[i].ID < contributors[j].ID
|
||
}
|
||
return contributors[i].Name < contributors[j].Name
|
||
}
|
||
return contributors[i].Value > contributors[j].Value
|
||
})
|
||
|
||
if len(contributors) > 3 {
|
||
contributors = contributors[:3]
|
||
}
|
||
return contributors
|
||
}
|
||
|
||
func buildWorkloadsBlastRadius(
|
||
snapshots []workloadsSummarySnapshot,
|
||
selector func(workloadsSummarySnapshot) float64,
|
||
) WorkloadsSummaryBlastRadius {
|
||
values := make([]float64, 0, len(snapshots))
|
||
for _, snapshot := range snapshots {
|
||
value := selector(snapshot)
|
||
if value <= 0 {
|
||
continue
|
||
}
|
||
values = append(values, value)
|
||
}
|
||
|
||
if len(values) == 0 {
|
||
return WorkloadsSummaryBlastRadius{
|
||
Scope: "idle",
|
||
Top3Share: 0,
|
||
ActiveWorkloads: 0,
|
||
}
|
||
}
|
||
|
||
sort.Slice(values, func(i, j int) bool { return values[i] > values[j] })
|
||
total := 0.0
|
||
for _, value := range values {
|
||
total += value
|
||
}
|
||
|
||
topCount := 3
|
||
if len(values) < topCount {
|
||
topCount = len(values)
|
||
}
|
||
top3 := 0.0
|
||
for i := 0; i < topCount; i++ {
|
||
top3 += values[i]
|
||
}
|
||
|
||
share := 0.0
|
||
if total > 0 {
|
||
share = (top3 / total) * 100
|
||
}
|
||
|
||
scope := "distributed"
|
||
switch {
|
||
case share >= 80:
|
||
scope = "concentrated"
|
||
case share >= 55:
|
||
scope = "mixed"
|
||
}
|
||
|
||
return WorkloadsSummaryBlastRadius{
|
||
Scope: scope,
|
||
Top3Share: share,
|
||
ActiveWorkloads: len(values),
|
||
}
|
||
}
|
||
|
||
// handleWorkloadsSummaryCharts serves compact, aggregate workload sparklines
|
||
// for the Workloads top cards. It intentionally avoids returning per-workload
|
||
// time series to keep payloads bounded for large fleets.
|
||
func (r *Router) handleWorkloadsSummaryCharts(w http.ResponseWriter, req *http.Request) {
|
||
log.Debug().Str("method", req.Method).Str("url", req.URL.String()).Msg("Workloads summary charts endpoint hit")
|
||
const inMemoryChartThreshold = 2 * time.Hour
|
||
|
||
if req.Method != http.MethodGet && req.Method != http.MethodHead {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
query := req.URL.Query()
|
||
timeRange := query.Get("range")
|
||
if timeRange == "" {
|
||
timeRange = "1h"
|
||
}
|
||
selectedNodeID := strings.TrimSpace(query.Get("node"))
|
||
duration := parseChartsRangeDuration(timeRange)
|
||
|
||
monitor := r.getTenantMonitor(req.Context())
|
||
if monitor == nil {
|
||
http.Error(w, "Tenant monitor is not available", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
nodes := monitor.NodesSnapshot()
|
||
readState := monitor.GetUnifiedReadStateOrSnapshot()
|
||
if readState == nil {
|
||
http.Error(w, "State unavailable", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
mockModeEnabled := mock.IsMockEnabled()
|
||
metricsStoreEnabled := monitor.GetMetricsStore() != nil
|
||
primarySourceHint := "memory"
|
||
if metricsStoreEnabled && duration > inMemoryChartThreshold {
|
||
primarySourceHint = "store_or_memory_fallback"
|
||
}
|
||
|
||
currentTime := time.Now().UnixMilli()
|
||
currentTimeTime := time.UnixMilli(currentTime)
|
||
oldestTimestamp := currentTime
|
||
buckets := make(map[int64]*workloadSummaryBuckets)
|
||
guestPointCount := 0
|
||
guestCounts := WorkloadsGuestCounts{}
|
||
snapshots := make([]workloadsSummarySnapshot, 0, len(readState.VMs())+len(readState.Containers()))
|
||
|
||
var selectedNode *models.Node
|
||
if selectedNodeID != "" {
|
||
for idx := range nodes {
|
||
if nodes[idx].ID == selectedNodeID {
|
||
selectedNode = &nodes[idx]
|
||
break
|
||
}
|
||
}
|
||
if selectedNode == nil {
|
||
log.Debug().
|
||
Str("selectedNodeID", selectedNodeID).
|
||
Msg("Workloads summary node filter not found in current state; falling back to global scope")
|
||
}
|
||
}
|
||
|
||
matchesSelectedNode := func(instance, nodeName string) bool {
|
||
if selectedNodeID == "" {
|
||
return true
|
||
}
|
||
if selectedNode == nil {
|
||
return true
|
||
}
|
||
return strings.EqualFold(strings.TrimSpace(instance), strings.TrimSpace(selectedNode.Instance)) &&
|
||
strings.EqualFold(strings.TrimSpace(nodeName), strings.TrimSpace(selectedNode.Name))
|
||
}
|
||
|
||
matchesSelectedDockerHostView := func(host *unifiedresources.DockerHostView) bool {
|
||
if selectedNodeID == "" {
|
||
return true
|
||
}
|
||
if selectedNode == nil {
|
||
return true
|
||
}
|
||
if host == nil {
|
||
return false
|
||
}
|
||
nodeName := strings.TrimSpace(selectedNode.Name)
|
||
if nodeName == "" {
|
||
return false
|
||
}
|
||
return strings.EqualFold(strings.TrimSpace(host.Hostname()), nodeName) ||
|
||
strings.EqualFold(strings.TrimSpace(host.Name()), nodeName)
|
||
}
|
||
|
||
matchesSelectedKubernetesPodView := func(pod *unifiedresources.PodView) bool {
|
||
if selectedNodeID == "" {
|
||
return true
|
||
}
|
||
if selectedNode == nil {
|
||
return true
|
||
}
|
||
if pod == nil {
|
||
return false
|
||
}
|
||
nodeName := strings.TrimSpace(selectedNode.Name)
|
||
if nodeName == "" {
|
||
return false
|
||
}
|
||
return strings.EqualFold(strings.TrimSpace(pod.NodeName()), nodeName)
|
||
}
|
||
|
||
vmList := make([]*unifiedresources.VMView, 0)
|
||
vmResponseKeys := make([]string, 0)
|
||
vmRequests := make([]monitoring.GuestChartRequest, 0)
|
||
for _, vm := range readState.VMs() {
|
||
if vm == nil {
|
||
continue
|
||
}
|
||
if !matchesSelectedNode(vm.Instance(), vm.Node()) {
|
||
continue
|
||
}
|
||
responseKey, request, ok := vmChartRequest(vm)
|
||
if !ok {
|
||
continue
|
||
}
|
||
vmList = append(vmList, vm)
|
||
vmResponseKeys = append(vmResponseKeys, responseKey)
|
||
vmRequests = append(vmRequests, request)
|
||
}
|
||
containerList := make([]*unifiedresources.ContainerView, 0)
|
||
containerResponseKeys := make([]string, 0)
|
||
containerRequests := make([]monitoring.GuestChartRequest, 0)
|
||
for _, ct := range readState.Containers() {
|
||
if ct == nil {
|
||
continue
|
||
}
|
||
if !matchesSelectedNode(ct.Instance(), ct.Node()) {
|
||
continue
|
||
}
|
||
responseKey, request, ok := systemContainerChartRequest(ct)
|
||
if !ok {
|
||
continue
|
||
}
|
||
containerList = append(containerList, ct)
|
||
containerResponseKeys = append(containerResponseKeys, responseKey)
|
||
containerRequests = append(containerRequests, request)
|
||
}
|
||
podList := make([]*unifiedresources.PodView, 0)
|
||
podRequests := make([]monitoring.GuestChartRequest, 0)
|
||
for _, pod := range readState.Pods() {
|
||
if pod == nil {
|
||
continue
|
||
}
|
||
if !matchesSelectedKubernetesPodView(pod) {
|
||
continue
|
||
}
|
||
|
||
metricKey := kubernetesPodMetricIDFromView(pod)
|
||
if metricKey == "" {
|
||
continue
|
||
}
|
||
podList = append(podList, pod)
|
||
podRequests = append(podRequests, monitoring.GuestChartRequest{InMemoryKey: metricKey, SQLResourceID: metricKey})
|
||
}
|
||
dockerHostsByID := make(map[string]*unifiedresources.DockerHostView, len(readState.DockerHosts()))
|
||
for _, host := range readState.DockerHosts() {
|
||
if host == nil {
|
||
continue
|
||
}
|
||
dockerHostsByID[host.ID()] = host
|
||
}
|
||
|
||
dockerContainerList := make([]*unifiedresources.DockerContainerView, 0)
|
||
dockerContainerRequests := make([]monitoring.GuestChartRequest, 0)
|
||
for _, container := range readState.DockerContainers() {
|
||
if container == nil {
|
||
continue
|
||
}
|
||
if selectedNodeID != "" && selectedNode != nil {
|
||
host := dockerHostsByID[container.ParentID()]
|
||
if host == nil || !matchesSelectedDockerHostView(host) {
|
||
continue
|
||
}
|
||
}
|
||
containerID := strings.TrimSpace(container.ContainerID())
|
||
if containerID == "" {
|
||
continue
|
||
}
|
||
dockerContainerList = append(dockerContainerList, container)
|
||
dockerContainerRequests = append(dockerContainerRequests, monitoring.GuestChartRequest{
|
||
InMemoryKey: fmt.Sprintf("docker:%s", containerID),
|
||
SQLResourceID: containerID,
|
||
})
|
||
}
|
||
var (
|
||
vmBatchMetrics map[string]map[string][]monitoring.MetricPoint
|
||
containerBatchMetrics map[string]map[string][]monitoring.MetricPoint
|
||
podBatchMetrics map[string]map[string][]monitoring.MetricPoint
|
||
dockerContainerBatchMetrics map[string]map[string][]monitoring.MetricPoint
|
||
)
|
||
var workloadsSummaryBatchWG sync.WaitGroup
|
||
workloadsSummaryBatchWG.Add(4)
|
||
go func() {
|
||
defer workloadsSummaryBatchWG.Done()
|
||
vmBatchMetrics = monitor.GetGuestMetricsForChartBatch("vm", vmRequests, duration, workloadSummaryMetricOrder...)
|
||
}()
|
||
go func() {
|
||
defer workloadsSummaryBatchWG.Done()
|
||
containerBatchMetrics = monitor.GetGuestMetricsForChartBatch("container", containerRequests, duration, workloadSummaryMetricOrder...)
|
||
}()
|
||
go func() {
|
||
defer workloadsSummaryBatchWG.Done()
|
||
podBatchMetrics = monitor.GetGuestMetricsForChartBatch("k8s", podRequests, duration, workloadSummaryMetricOrder...)
|
||
}()
|
||
go func() {
|
||
defer workloadsSummaryBatchWG.Done()
|
||
dockerContainerBatchMetrics = monitor.GetGuestMetricsForChartBatch("dockerContainer", dockerContainerRequests, duration, workloadSummaryMetricOrder...)
|
||
}()
|
||
workloadsSummaryBatchWG.Wait()
|
||
|
||
for idx, vm := range vmList {
|
||
responseKey := vmResponseKeys[idx]
|
||
metricID := vmRequests[idx].SQLResourceID
|
||
guestCounts.Total++
|
||
if workloadSummaryStatusIsRunning("", vm.Status()) {
|
||
guestCounts.Running++
|
||
} else {
|
||
guestCounts.Stopped++
|
||
}
|
||
|
||
snapshot := workloadsSummarySnapshot{
|
||
id: responseKey,
|
||
name: strings.TrimSpace(vm.Name()),
|
||
cpu: clampWorkloadPercent(vm.CPUPercent()),
|
||
memory: clampWorkloadPercent(vm.MemoryPercent()),
|
||
disk: clampWorkloadPercent(vm.DiskPercent()),
|
||
network: clampNonNegativeWorkloadValue(vm.NetIn() + vm.NetOut()),
|
||
}
|
||
if snapshot.name == "" {
|
||
snapshot.name = responseKey
|
||
}
|
||
|
||
metrics := vmBatchMetrics[metricID]
|
||
cpuPoints := metrics["cpu"]
|
||
if len(cpuPoints) == 0 {
|
||
cpuPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: vm.CPUPercent()}}
|
||
}
|
||
memoryPoints := metrics["memory"]
|
||
if len(memoryPoints) == 0 {
|
||
memoryPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: vm.MemoryPercent()}}
|
||
}
|
||
diskPoints := metrics["disk"]
|
||
if len(diskPoints) == 0 {
|
||
diskPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: vm.DiskPercent()}}
|
||
}
|
||
netInPoints := metrics["netin"]
|
||
netOutPoints := metrics["netout"]
|
||
if len(netInPoints) == 0 && len(netOutPoints) == 0 {
|
||
netInPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: vm.NetIn()}}
|
||
netOutPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: vm.NetOut()}}
|
||
}
|
||
|
||
networkPoints := mergeWorkloadNetworkPoints(netInPoints, netOutPoints)
|
||
|
||
snapshot.cpu = latestSummaryMetricValue(cpuPoints, snapshot.cpu, clampWorkloadPercent)
|
||
snapshot.memory = latestSummaryMetricValue(memoryPoints, snapshot.memory, clampWorkloadPercent)
|
||
snapshot.disk = latestSummaryMetricValue(diskPoints, snapshot.disk, clampWorkloadPercent)
|
||
snapshot.network = latestSummaryMetricValue(networkPoints, snapshot.network, clampNonNegativeWorkloadValue)
|
||
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, cpuPoints, "cpu", &oldestTimestamp)
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, memoryPoints, "memory", &oldestTimestamp)
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, diskPoints, "disk", &oldestTimestamp)
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, networkPoints, "network", &oldestTimestamp)
|
||
snapshots = append(snapshots, snapshot)
|
||
}
|
||
|
||
for idx, ct := range containerList {
|
||
responseKey := containerResponseKeys[idx]
|
||
metricID := containerRequests[idx].SQLResourceID
|
||
guestCounts.Total++
|
||
if workloadSummaryStatusIsRunning("", ct.Status()) {
|
||
guestCounts.Running++
|
||
} else {
|
||
guestCounts.Stopped++
|
||
}
|
||
|
||
snapshot := workloadsSummarySnapshot{
|
||
id: responseKey,
|
||
name: strings.TrimSpace(ct.Name()),
|
||
cpu: clampWorkloadPercent(ct.CPUPercent()),
|
||
memory: clampWorkloadPercent(ct.MemoryPercent()),
|
||
disk: clampWorkloadPercent(ct.DiskPercent()),
|
||
network: clampNonNegativeWorkloadValue(ct.NetIn() + ct.NetOut()),
|
||
}
|
||
if snapshot.name == "" {
|
||
snapshot.name = responseKey
|
||
}
|
||
|
||
metrics := containerBatchMetrics[metricID]
|
||
cpuPoints := metrics["cpu"]
|
||
if len(cpuPoints) == 0 {
|
||
cpuPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: ct.CPUPercent()}}
|
||
}
|
||
memoryPoints := metrics["memory"]
|
||
if len(memoryPoints) == 0 {
|
||
memoryPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: ct.MemoryPercent()}}
|
||
}
|
||
diskPoints := metrics["disk"]
|
||
if len(diskPoints) == 0 {
|
||
diskPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: ct.DiskPercent()}}
|
||
}
|
||
netInPoints := metrics["netin"]
|
||
netOutPoints := metrics["netout"]
|
||
if len(netInPoints) == 0 && len(netOutPoints) == 0 {
|
||
netInPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: ct.NetIn()}}
|
||
netOutPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: ct.NetOut()}}
|
||
}
|
||
|
||
networkPoints := mergeWorkloadNetworkPoints(netInPoints, netOutPoints)
|
||
|
||
snapshot.cpu = latestSummaryMetricValue(cpuPoints, snapshot.cpu, clampWorkloadPercent)
|
||
snapshot.memory = latestSummaryMetricValue(memoryPoints, snapshot.memory, clampWorkloadPercent)
|
||
snapshot.disk = latestSummaryMetricValue(diskPoints, snapshot.disk, clampWorkloadPercent)
|
||
snapshot.network = latestSummaryMetricValue(networkPoints, snapshot.network, clampNonNegativeWorkloadValue)
|
||
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, cpuPoints, "cpu", &oldestTimestamp)
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, memoryPoints, "memory", &oldestTimestamp)
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, diskPoints, "disk", &oldestTimestamp)
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, networkPoints, "network", &oldestTimestamp)
|
||
snapshots = append(snapshots, snapshot)
|
||
}
|
||
|
||
for _, pod := range podList {
|
||
metricKey := kubernetesPodMetricIDFromView(pod)
|
||
|
||
guestCounts.Total++
|
||
if strings.EqualFold(pod.PodPhase(), "running") {
|
||
guestCounts.Running++
|
||
} else {
|
||
guestCounts.Stopped++
|
||
}
|
||
|
||
snapshot := workloadsSummarySnapshot{
|
||
id: metricKey,
|
||
name: strings.TrimSpace(pod.Namespace()),
|
||
cpu: clampWorkloadPercent(pod.CPUPercent()),
|
||
memory: clampWorkloadPercent(pod.MemoryPercent()),
|
||
disk: clampWorkloadPercent(pod.DiskPercent()),
|
||
network: clampNonNegativeWorkloadValue(pod.NetInRate() + pod.NetOutRate()),
|
||
}
|
||
if name := strings.TrimSpace(pod.Name()); name != "" {
|
||
if snapshot.name == "" {
|
||
snapshot.name = name
|
||
} else {
|
||
snapshot.name = fmt.Sprintf("%s/%s", snapshot.name, name)
|
||
}
|
||
}
|
||
if snapshot.name == "" {
|
||
snapshot.name = metricKey
|
||
}
|
||
|
||
metrics := podBatchMetrics[metricKey]
|
||
cpuPoints := metrics["cpu"]
|
||
if len(cpuPoints) == 0 {
|
||
cpuPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: pod.CPUPercent()}}
|
||
}
|
||
memoryPoints := metrics["memory"]
|
||
if len(memoryPoints) == 0 {
|
||
memoryPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: pod.MemoryPercent()}}
|
||
}
|
||
diskPoints := metrics["disk"]
|
||
if len(diskPoints) == 0 {
|
||
diskPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: pod.DiskPercent()}}
|
||
}
|
||
netInPoints := metrics["netin"]
|
||
if len(netInPoints) == 0 {
|
||
netInPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: pod.NetInRate()}}
|
||
}
|
||
netOutPoints := metrics["netout"]
|
||
if len(netOutPoints) == 0 {
|
||
netOutPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: pod.NetOutRate()}}
|
||
}
|
||
|
||
if mockModeEnabled {
|
||
if len(cpuPoints) < mockWorkloadMinSeriesPoints {
|
||
cpuPoints = buildMockWorkloadMetricHistorySeries(currentTimeTime, duration, 0, "k8s", metricKey, "cpu", snapshot.cpu)
|
||
}
|
||
if len(memoryPoints) < mockWorkloadMinSeriesPoints {
|
||
memoryPoints = buildMockWorkloadMetricHistorySeries(currentTimeTime, duration, 0, "k8s", metricKey, "memory", snapshot.memory)
|
||
}
|
||
if len(diskPoints) < mockWorkloadMinSeriesPoints {
|
||
diskPoints = buildMockWorkloadMetricHistorySeries(currentTimeTime, duration, 0, "k8s", metricKey, "disk", snapshot.disk)
|
||
}
|
||
if len(netInPoints) < mockWorkloadMinSeriesPoints {
|
||
netInPoints = buildMockWorkloadMetricHistorySeries(currentTimeTime, duration, 0, "k8s", metricKey, "netin", pod.NetInRate())
|
||
}
|
||
if len(netOutPoints) < mockWorkloadMinSeriesPoints {
|
||
netOutPoints = buildMockWorkloadMetricHistorySeries(currentTimeTime, duration, 0, "k8s", metricKey, "netout", pod.NetOutRate())
|
||
}
|
||
}
|
||
|
||
networkPoints := mergeWorkloadNetworkPoints(netInPoints, netOutPoints)
|
||
|
||
snapshot.cpu = latestSummaryMetricValue(cpuPoints, snapshot.cpu, clampWorkloadPercent)
|
||
snapshot.memory = latestSummaryMetricValue(memoryPoints, snapshot.memory, clampWorkloadPercent)
|
||
snapshot.disk = latestSummaryMetricValue(diskPoints, snapshot.disk, clampWorkloadPercent)
|
||
snapshot.network = latestSummaryMetricValue(networkPoints, snapshot.network, clampNonNegativeWorkloadValue)
|
||
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, cpuPoints, "cpu", &oldestTimestamp)
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, memoryPoints, "memory", &oldestTimestamp)
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, diskPoints, "disk", &oldestTimestamp)
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, networkPoints, "network", &oldestTimestamp)
|
||
snapshots = append(snapshots, snapshot)
|
||
}
|
||
|
||
for _, container := range dockerContainerList {
|
||
containerID := strings.TrimSpace(container.ContainerID())
|
||
guestCounts.Total++
|
||
containerState := strings.TrimSpace(container.ContainerState())
|
||
isRunning := workloadSummaryStatusIsRunning(containerState, container.Status())
|
||
if !isRunning && containerState == "" {
|
||
isRunning = container.CPUPercent() > 0 ||
|
||
container.MemoryPercent() > 0 ||
|
||
container.NetInRate() > 0 ||
|
||
container.NetOutRate() > 0
|
||
}
|
||
if isRunning {
|
||
guestCounts.Running++
|
||
} else {
|
||
guestCounts.Stopped++
|
||
}
|
||
|
||
snapshot := workloadsSummarySnapshot{
|
||
id: containerID,
|
||
name: strings.TrimSpace(container.Name()),
|
||
cpu: clampWorkloadPercent(container.CPUPercent()),
|
||
memory: clampWorkloadPercent(container.MemoryPercent()),
|
||
disk: clampWorkloadPercent(container.DiskPercent()),
|
||
network: 0,
|
||
}
|
||
if snapshot.name == "" {
|
||
snapshot.name = containerID
|
||
}
|
||
|
||
metrics := dockerContainerBatchMetrics[containerID]
|
||
cpuPoints := metrics["cpu"]
|
||
if len(cpuPoints) == 0 {
|
||
cpuPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: container.CPUPercent()}}
|
||
}
|
||
memoryPoints := metrics["memory"]
|
||
if len(memoryPoints) == 0 {
|
||
memoryPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: container.MemoryPercent()}}
|
||
}
|
||
diskPoints := metrics["disk"]
|
||
if len(diskPoints) == 0 {
|
||
diskPoints = []monitoring.MetricPoint{{Timestamp: currentTimeTime, Value: container.DiskPercent()}}
|
||
}
|
||
netInPoints := metrics["netin"]
|
||
netOutPoints := metrics["netout"]
|
||
|
||
networkPoints := mergeWorkloadNetworkPoints(netInPoints, netOutPoints)
|
||
|
||
snapshot.cpu = latestSummaryMetricValue(cpuPoints, snapshot.cpu, clampWorkloadPercent)
|
||
snapshot.memory = latestSummaryMetricValue(memoryPoints, snapshot.memory, clampWorkloadPercent)
|
||
snapshot.disk = latestSummaryMetricValue(diskPoints, snapshot.disk, clampWorkloadPercent)
|
||
snapshot.network = latestSummaryMetricValue(networkPoints, snapshot.network, clampNonNegativeWorkloadValue)
|
||
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, cpuPoints, "cpu", &oldestTimestamp)
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, memoryPoints, "memory", &oldestTimestamp)
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, diskPoints, "disk", &oldestTimestamp)
|
||
guestPointCount += appendWorkloadMetricPoints(buckets, networkPoints, "network", &oldestTimestamp)
|
||
snapshots = append(snapshots, snapshot)
|
||
}
|
||
|
||
cpuMetric := buildWorkloadsSummaryMetric(buckets, func(bucket *workloadSummaryBuckets) []float64 {
|
||
return bucket.cpu
|
||
})
|
||
memoryMetric := buildWorkloadsSummaryMetric(buckets, func(bucket *workloadSummaryBuckets) []float64 {
|
||
return bucket.memory
|
||
})
|
||
diskMetric := buildWorkloadsSummaryMetric(buckets, func(bucket *workloadSummaryBuckets) []float64 {
|
||
return bucket.disk
|
||
})
|
||
networkMetric := buildWorkloadsSummaryMetric(buckets, func(bucket *workloadSummaryBuckets) []float64 {
|
||
return bucket.network
|
||
})
|
||
cpuMetric = normalizeWorkloadsSummaryMetricPointSeries(cpuMetric, duration)
|
||
memoryMetric = normalizeWorkloadsSummaryMetricPointSeries(memoryMetric, duration)
|
||
diskMetric = normalizeWorkloadsSummaryMetricPointSeries(diskMetric, duration)
|
||
networkMetric = normalizeWorkloadsSummaryMetricPointSeries(networkMetric, duration)
|
||
|
||
summaryPointCount := summaryMetricPointCount(cpuMetric) +
|
||
summaryMetricPointCount(memoryMetric) +
|
||
summaryMetricPointCount(diskMetric) +
|
||
summaryMetricPointCount(networkMetric)
|
||
|
||
topContributors := WorkloadsSummaryContributors{
|
||
CPU: buildWorkloadsTopContributors(snapshots, func(snapshot workloadsSummarySnapshot) float64 {
|
||
return snapshot.cpu
|
||
}),
|
||
Memory: buildWorkloadsTopContributors(snapshots, func(snapshot workloadsSummarySnapshot) float64 {
|
||
return snapshot.memory
|
||
}),
|
||
Disk: buildWorkloadsTopContributors(snapshots, func(snapshot workloadsSummarySnapshot) float64 {
|
||
return snapshot.disk
|
||
}),
|
||
Network: buildWorkloadsTopContributors(snapshots, func(snapshot workloadsSummarySnapshot) float64 {
|
||
return snapshot.network
|
||
}),
|
||
}
|
||
|
||
blastRadius := WorkloadsSummaryBlastRadiusGroup{
|
||
CPU: buildWorkloadsBlastRadius(snapshots, func(snapshot workloadsSummarySnapshot) float64 {
|
||
return snapshot.cpu
|
||
}),
|
||
Memory: buildWorkloadsBlastRadius(snapshots, func(snapshot workloadsSummarySnapshot) float64 {
|
||
return snapshot.memory
|
||
}),
|
||
Disk: buildWorkloadsBlastRadius(snapshots, func(snapshot workloadsSummarySnapshot) float64 {
|
||
return snapshot.disk
|
||
}),
|
||
Network: buildWorkloadsBlastRadius(snapshots, func(snapshot workloadsSummarySnapshot) float64 {
|
||
return snapshot.network
|
||
}),
|
||
}
|
||
|
||
response := EmptyWorkloadsSummaryChartsResponse()
|
||
response.CPU = cpuMetric
|
||
response.Memory = memoryMetric
|
||
response.Disk = diskMetric
|
||
response.Network = networkMetric
|
||
response.GuestCounts = guestCounts
|
||
response.TopContributors = topContributors
|
||
response.BlastRadius = blastRadius
|
||
response.Timestamp = currentTime
|
||
response.Stats = ChartStats{
|
||
OldestDataTimestamp: oldestTimestamp,
|
||
Range: timeRange,
|
||
RangeSeconds: int64(duration / time.Second),
|
||
MetricsStoreEnabled: metricsStoreEnabled,
|
||
PrimarySourceHint: primarySourceHint,
|
||
InMemoryThresholdSecs: int64(inMemoryChartThreshold / time.Second),
|
||
PointCounts: ChartPointCounts{
|
||
Total: summaryPointCount,
|
||
Guests: guestPointCount,
|
||
},
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
if err := json.NewEncoder(w).Encode(response.NormalizeCollections()); err != nil {
|
||
log.Error().Err(err).Msg("Failed to encode workloads summary chart data response")
|
||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
}
|
||
|
||
func workloadSummaryStatusIsRunning(runtimeState string, status unifiedresources.ResourceStatus) bool {
|
||
switch strings.ToLower(strings.TrimSpace(runtimeState)) {
|
||
case "running", "online", "ok":
|
||
return true
|
||
case "stopped", "offline", "paused", "created", "dead", "exited":
|
||
return false
|
||
}
|
||
|
||
switch status {
|
||
case unifiedresources.StatusOnline:
|
||
return true
|
||
case unifiedresources.StatusOffline:
|
||
return false
|
||
}
|
||
|
||
return false
|
||
}
|
||
|
||
// handleStorageCharts returns pool capacity and physical disk temperature
|
||
// time-series for the storage summary sparklines.
|
||
func (r *Router) handleStorageCharts(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodGet && req.Method != http.MethodHead {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
// Parse query parameters
|
||
query := req.URL.Query()
|
||
rangeMinutes := 60 // default 1 hour
|
||
if rangeStr := query.Get("range"); rangeStr != "" {
|
||
if _, err := fmt.Sscanf(rangeStr, "%d", &rangeMinutes); err != nil {
|
||
log.Warn().Err(err).Str("range", rangeStr).Msg("Invalid range parameter; using default")
|
||
}
|
||
}
|
||
|
||
duration := time.Duration(rangeMinutes) * time.Minute
|
||
selectedNodeID := strings.TrimSpace(query.Get("node"))
|
||
|
||
// Use tenant-aware monitor
|
||
monitor := r.getTenantMonitor(req.Context())
|
||
if monitor == nil {
|
||
http.Error(w, "Monitor not available", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
readState := monitor.GetUnifiedReadStateOrSnapshot()
|
||
if readState == nil {
|
||
http.Error(w, "State unavailable", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
// Resolve node filter from canonical unified resources so storage charts use
|
||
// the same node identity model as the frontend storage page.
|
||
var selectedNodeName, selectedNodeInstance string
|
||
if selectedNodeID != "" {
|
||
found := false
|
||
for _, resource := range monitor.GetUnifiedResources() {
|
||
if strings.TrimSpace(resource.ID) != selectedNodeID {
|
||
continue
|
||
}
|
||
selectedNodeName = storageChartsSelectedNodeName(resource)
|
||
selectedNodeInstance = storageChartsSelectedNodeInstance(resource)
|
||
if selectedNodeName != "" || selectedNodeInstance != "" {
|
||
found = true
|
||
break
|
||
}
|
||
}
|
||
if !found {
|
||
for _, n := range monitor.NodesSnapshot() {
|
||
if n.ID == selectedNodeID {
|
||
selectedNodeName = n.Name
|
||
selectedNodeInstance = n.Instance
|
||
found = true
|
||
break
|
||
}
|
||
}
|
||
}
|
||
if !found {
|
||
log.Debug().
|
||
Str("selectedNodeID", selectedNodeID).
|
||
Msg("Storage charts node filter not found in current state; falling back to global scope")
|
||
}
|
||
}
|
||
matchesNode := func(nodeName, instance string) bool {
|
||
if selectedNodeName == "" {
|
||
return true
|
||
}
|
||
if !strings.EqualFold(strings.TrimSpace(nodeName), selectedNodeName) {
|
||
return false
|
||
}
|
||
if selectedNodeInstance != "" && instance != "" {
|
||
return strings.EqualFold(strings.TrimSpace(instance), selectedNodeInstance)
|
||
}
|
||
return true
|
||
}
|
||
|
||
// Build pool chart data from the canonical storage summary batch path so
|
||
// the dashboard and storage page share one efficient history retrieval model.
|
||
poolNames := make(map[string]string, len(readState.StoragePools()))
|
||
storageIDs := make([]string, 0, len(readState.StoragePools()))
|
||
for _, sp := range readState.StoragePools() {
|
||
if sp == nil {
|
||
continue
|
||
}
|
||
if !matchesNode(sp.Node(), sp.Instance()) {
|
||
continue
|
||
}
|
||
sid := sp.SourceID()
|
||
if sid == "" {
|
||
continue
|
||
}
|
||
poolNames[sid] = sp.Name()
|
||
storageIDs = append(storageIDs, sid)
|
||
}
|
||
|
||
poolMetrics := monitor.GetStorageMetricsForChartBatch(storageIDs, duration)
|
||
pools := make(map[string]StoragePoolChartData, len(storageIDs))
|
||
for _, sid := range storageIDs {
|
||
metrics := poolMetrics[sid]
|
||
pools[sid] = StoragePoolChartData{
|
||
Name: poolNames[sid],
|
||
Usage: monitorPointsToAPI(metrics["usage"]),
|
||
Used: monitorPointsToAPI(metrics["used"]),
|
||
Avail: monitorPointsToAPI(metrics["avail"]),
|
||
}
|
||
}
|
||
|
||
// Build disk temperature chart data
|
||
diskEntries := monitor.GetPhysicalDiskTemperatureCharts(duration)
|
||
disks := make(map[string]StorageDiskChartData, len(diskEntries))
|
||
for id, entry := range diskEntries {
|
||
if !matchesNode(entry.Node, entry.Instance) {
|
||
continue
|
||
}
|
||
disks[id] = StorageDiskChartData{
|
||
Name: entry.Name,
|
||
Node: entry.Node,
|
||
Temperature: monitorPointsToAPI(entry.Temperature),
|
||
}
|
||
}
|
||
|
||
resp := EmptyStorageChartsResponse()
|
||
resp.Pools = pools
|
||
resp.Disks = disks
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
if err := json.NewEncoder(w).Encode(resp.NormalizeCollections()); err != nil {
|
||
log.Error().Err(err).Msg("Failed to encode storage chart data")
|
||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||
}
|
||
}
|
||
|
||
// handleStorageSummaryCharts serves a compact aggregate capacity trend for the
|
||
// dashboard storage card. It intentionally avoids returning per-pool and
|
||
// per-disk series so the dashboard does not overfetch the full storage page
|
||
// payload.
|
||
func (r *Router) handleStorageSummaryCharts(w http.ResponseWriter, req *http.Request) {
|
||
const inMemoryChartThreshold = 2 * time.Hour
|
||
|
||
if req.Method != http.MethodGet && req.Method != http.MethodHead {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
query := req.URL.Query()
|
||
timeRange := query.Get("range")
|
||
if timeRange == "" {
|
||
timeRange = "24h"
|
||
}
|
||
duration := parseChartsRangeDuration(timeRange)
|
||
|
||
monitor := r.getTenantMonitor(req.Context())
|
||
if monitor == nil {
|
||
http.Error(w, "Tenant monitor is not available", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
currentTime := time.Now().UnixMilli()
|
||
capacity, oldestTimestamp := monitor.GetStorageSummaryCapacityTrend(duration)
|
||
if oldestTimestamp == 0 {
|
||
oldestTimestamp = currentTime
|
||
}
|
||
|
||
metricsStoreEnabled := monitor.GetMetricsStore() != nil
|
||
primarySourceHint := "memory"
|
||
if metricsStoreEnabled && duration > inMemoryChartThreshold {
|
||
primarySourceHint = "store_or_memory_fallback"
|
||
}
|
||
|
||
resp := EmptyStorageSummaryTrendResponse()
|
||
resp.Capacity = monitorPointsToAPI(capacity)
|
||
resp.Timestamp = currentTime
|
||
resp.Stats = ChartStats{
|
||
OldestDataTimestamp: oldestTimestamp,
|
||
Range: timeRange,
|
||
RangeSeconds: int64(duration / time.Second),
|
||
MetricsStoreEnabled: metricsStoreEnabled,
|
||
PrimarySourceHint: primarySourceHint,
|
||
InMemoryThresholdSecs: int64(inMemoryChartThreshold / time.Second),
|
||
PointCounts: ChartPointCounts{
|
||
Total: len(resp.Capacity),
|
||
Storage: len(resp.Capacity),
|
||
},
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
if err := json.NewEncoder(w).Encode(resp.NormalizeCollections()); err != nil {
|
||
log.Error().Err(err).Msg("Failed to encode storage summary chart data")
|
||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||
}
|
||
}
|
||
|
||
// monitorPointsToAPI converts monitoring MetricPoints (time.Time timestamps)
|
||
// to API MetricPoints (Unix millisecond timestamps) for JSON serialization.
|
||
func monitorPointsToAPI(points []monitoring.MetricPoint) []MetricPoint {
|
||
if len(points) == 0 {
|
||
return nil
|
||
}
|
||
out := make([]MetricPoint, len(points))
|
||
for i, p := range points {
|
||
out[i] = MetricPoint{Timestamp: p.Timestamp.UnixMilli(), Value: p.Value}
|
||
}
|
||
return out
|
||
}
|
||
|
||
// handleMetricsStoreStats returns statistics about the persistent metrics store
|
||
func (r *Router) handleMetricsStoreStats(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodGet {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
// Use tenant-aware monitor
|
||
monitor := r.getTenantMonitor(req.Context())
|
||
if monitor == nil {
|
||
http.Error(w, "Monitor not available", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
store := monitor.GetMetricsStore()
|
||
if store == nil {
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||
"enabled": false,
|
||
"error": "Persistent metrics store not initialized",
|
||
})
|
||
return
|
||
}
|
||
|
||
stats := store.GetStats()
|
||
w.Header().Set("Content-Type", "application/json")
|
||
if err := json.NewEncoder(w).Encode(map[string]interface{}{
|
||
"enabled": true,
|
||
"dbSize": stats.DBSize,
|
||
"rawCount": stats.RawCount,
|
||
"minuteCount": stats.MinuteCount,
|
||
"hourlyCount": stats.HourlyCount,
|
||
"dailyCount": stats.DailyCount,
|
||
"totalWrites": stats.TotalWrites,
|
||
"bufferSize": stats.BufferSize,
|
||
"lastFlush": stats.LastFlush,
|
||
"lastRollup": stats.LastRollup,
|
||
"lastRetention": stats.LastRetention,
|
||
}); err != nil {
|
||
log.Error().Err(err).Msg("Failed to encode metrics store stats")
|
||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||
}
|
||
}
|
||
|
||
// handleMetricsHistory returns historical metrics from the persistent SQLite store
|
||
// Query params:
|
||
// - resourceType: "node", "agent", "vm", "system-container", "oci-container", "app-container",
|
||
// "docker-host", "k8s", "storage", or "disk" (required)
|
||
// - resourceId: the resource identifier (required)
|
||
// - metric: "cpu", "memory", "disk", etc. (optional, omit for all metrics)
|
||
// - range: time range like "1h", "24h", "7d", "30d", "90d" (optional, default "24h")
|
||
func (r *Router) handleMetricsHistory(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodGet {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
// Use tenant-aware monitor
|
||
monitor := r.getTenantMonitor(req.Context())
|
||
if monitor == nil {
|
||
http.Error(w, "Monitor not available", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
query := req.URL.Query()
|
||
resourceTypeInput := strings.ToLower(strings.TrimSpace(query.Get("resourceType")))
|
||
resourceID := query.Get("resourceId")
|
||
metricType := query.Get("metric")
|
||
timeRange := query.Get("range")
|
||
|
||
if resourceTypeInput == "" || resourceID == "" {
|
||
http.Error(w, "resourceType and resourceId are required", http.StatusBadRequest)
|
||
return
|
||
}
|
||
// Normalize and validate query aliases to runtime/store resource types.
|
||
responseResourceType, runtimeResourceType, storeResourceTypes, err := normalizeMetricsHistoryResourceType(resourceTypeInput)
|
||
if err != nil {
|
||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||
return
|
||
}
|
||
resourceID = canonicalizeMetricsHistoryResourceID(runtimeResourceType, resourceID)
|
||
|
||
// Parse time range
|
||
var duration time.Duration
|
||
var stepSecs int64 = 0 // Default to no downsampling (use tier resolution)
|
||
|
||
switch timeRange {
|
||
case "30m":
|
||
duration = 30 * time.Minute
|
||
case "1h":
|
||
duration = time.Hour
|
||
case "6h":
|
||
duration = 6 * time.Hour
|
||
case "12h":
|
||
duration = 12 * time.Hour
|
||
case "24h", "1d", "":
|
||
duration = 24 * time.Hour
|
||
case "7d":
|
||
duration = 7 * 24 * time.Hour
|
||
case "30d":
|
||
duration = 30 * 24 * time.Hour
|
||
case "90d":
|
||
duration = 90 * 24 * time.Hour
|
||
default:
|
||
// Try parsing as duration
|
||
var err error
|
||
duration, err = time.ParseDuration(timeRange)
|
||
if err != nil {
|
||
duration = 24 * time.Hour // Default to 24 hours
|
||
}
|
||
}
|
||
|
||
// Optional downsampling based on requested max points.
|
||
// When omitted, we return the native tier resolution.
|
||
if maxPointsStr := query.Get("maxPoints"); maxPointsStr != "" {
|
||
if maxPoints, err := strconv.Atoi(maxPointsStr); err == nil && maxPoints > 0 {
|
||
durationSecs := int64(duration.Seconds())
|
||
if durationSecs > 0 {
|
||
stepSecs = (durationSecs + int64(maxPoints) - 1) / int64(maxPoints)
|
||
if stepSecs <= 1 {
|
||
stepSecs = 0
|
||
} else {
|
||
minStep := func(d time.Duration) int64 {
|
||
switch {
|
||
case d <= 2*time.Hour:
|
||
return 5
|
||
case d <= 24*time.Hour:
|
||
return 60
|
||
case d <= 7*24*time.Hour:
|
||
return 3600
|
||
default:
|
||
return 86400
|
||
}
|
||
}
|
||
if stepSecs < minStep(duration) {
|
||
stepSecs = 0
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Enforce tier-aware history limits (e.g. Free=7d, Relay=14d, Pro=90d).
|
||
{
|
||
maxHistDays := freeHistoryDaysDefault
|
||
if r.licenseHandlers != nil {
|
||
if licSvc := r.licenseHandlers.Service(req.Context()); licSvc != nil {
|
||
status := licSvc.Status()
|
||
if status.Valid {
|
||
maxHistDays = tierHistoryDaysFromLicensing(status.Tier)
|
||
}
|
||
// When !status.Valid, maxHistDays stays at free-tier default.
|
||
}
|
||
}
|
||
maxDuration := time.Duration(maxHistDays) * 24 * time.Hour
|
||
if duration > maxDuration {
|
||
WriteLicenseRequired(w, featureLongTermMetricsValue, "Extended metrics history requires a higher-tier Pulse license")
|
||
return
|
||
}
|
||
}
|
||
|
||
end := time.Now()
|
||
start := end.Add(-duration)
|
||
|
||
const (
|
||
historySourceStore = "store"
|
||
historySourceMemory = "memory"
|
||
historySourceLive = "live"
|
||
historySourceMock = "mock_synthetic"
|
||
)
|
||
|
||
// Metric aliasing: storage metrics are stored under "usage", but some clients request "disk".
|
||
// Keep metricType unchanged for the response JSON; only alias the lookup/query key.
|
||
queryMetric := metricType
|
||
if runtimeResourceType == "storage" && metricType == "disk" {
|
||
queryMetric = "usage"
|
||
}
|
||
|
||
// Allow in-memory fallback for any requested range when the persistent store is empty.
|
||
// The in-memory history enforces its own retention limits, so it will naturally return
|
||
// whatever data is available (better than showing "Collecting data..." indefinitely).
|
||
fallbackAllowed := true
|
||
historyMaxPoints := parseWorkloadMaxPoints(query.Get("maxPoints"))
|
||
buildHistoryPoints := func(points []monitoring.MetricPoint, bucketSecs int64) []map[string]interface{} {
|
||
if len(points) == 0 {
|
||
return []map[string]interface{}{}
|
||
}
|
||
if bucketSecs <= 1 {
|
||
apiPoints := make([]map[string]interface{}, 0, len(points))
|
||
for _, p := range points {
|
||
apiPoints = append(apiPoints, map[string]interface{}{
|
||
"timestamp": p.Timestamp.UnixMilli(),
|
||
"value": p.Value,
|
||
"min": p.Value,
|
||
"max": p.Value,
|
||
})
|
||
}
|
||
return apiPoints
|
||
}
|
||
|
||
type bucket struct {
|
||
sum float64
|
||
count int
|
||
min float64
|
||
max float64
|
||
}
|
||
|
||
buckets := make(map[int64]*bucket)
|
||
for _, p := range points {
|
||
ts := p.Timestamp.Unix()
|
||
if ts <= 0 {
|
||
continue
|
||
}
|
||
start := (ts / bucketSecs) * bucketSecs
|
||
b, ok := buckets[start]
|
||
if !ok {
|
||
b = &bucket{
|
||
sum: p.Value,
|
||
count: 1,
|
||
min: p.Value,
|
||
max: p.Value,
|
||
}
|
||
buckets[start] = b
|
||
continue
|
||
}
|
||
b.sum += p.Value
|
||
b.count++
|
||
if p.Value < b.min {
|
||
b.min = p.Value
|
||
}
|
||
if p.Value > b.max {
|
||
b.max = p.Value
|
||
}
|
||
}
|
||
|
||
keys := make([]int64, 0, len(buckets))
|
||
for k := range buckets {
|
||
keys = append(keys, k)
|
||
}
|
||
sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })
|
||
|
||
apiPoints := make([]map[string]interface{}, 0, len(keys))
|
||
for _, k := range keys {
|
||
b := buckets[k]
|
||
if b.count == 0 {
|
||
continue
|
||
}
|
||
ts := time.Unix(k+(bucketSecs/2), 0)
|
||
apiPoints = append(apiPoints, map[string]interface{}{
|
||
"timestamp": ts.UnixMilli(),
|
||
"value": b.sum / float64(b.count),
|
||
"min": b.min,
|
||
"max": b.max,
|
||
})
|
||
}
|
||
return apiPoints
|
||
}
|
||
// Most requests are served from the metrics store. Keep the fallback/read-state
|
||
// snapshots lazy so store-backed history does not pay an O(fleet-size) copy cost.
|
||
var (
|
||
fallbackStateOnce sync.Once
|
||
vms []models.VM
|
||
containers []models.Container
|
||
nodes []models.Node
|
||
storagePools []models.Storage
|
||
dockerHosts []models.DockerHost
|
||
hosts []models.Host
|
||
|
||
diskFallbackOnce sync.Once
|
||
physicalDisks []unifiedresources.Resource
|
||
)
|
||
|
||
loadFallbackState := func() {
|
||
fallbackStateOnce.Do(func() {
|
||
vms = monitor.VMsSnapshot()
|
||
containers = monitor.ContainersSnapshot()
|
||
nodes = monitor.NodesSnapshot()
|
||
storagePools = monitor.StorageSnapshot()
|
||
dockerHosts = monitor.DockerHostsSnapshot()
|
||
hosts = monitor.HostsSnapshot()
|
||
})
|
||
}
|
||
|
||
loadPhysicalDisks := func() {
|
||
diskFallbackOnce.Do(func() {
|
||
for _, resource := range monitor.GetUnifiedResources() {
|
||
if resource.Type == unifiedresources.ResourceTypePhysicalDisk && resource.PhysicalDisk != nil {
|
||
physicalDisks = append(physicalDisks, resource)
|
||
}
|
||
}
|
||
})
|
||
}
|
||
|
||
parseGuestID := func(id string) (string, string, int, bool) {
|
||
parts := strings.Split(id, ":")
|
||
if len(parts) != 3 {
|
||
return "", "", 0, false
|
||
}
|
||
vmID, err := strconv.Atoi(parts[2])
|
||
if err != nil {
|
||
return "", "", 0, false
|
||
}
|
||
return parts[0], parts[1], vmID, true
|
||
}
|
||
|
||
findVM := func(id string) *models.VM {
|
||
loadFallbackState()
|
||
for i := range vms {
|
||
if vms[i].ID == id {
|
||
return &vms[i]
|
||
}
|
||
}
|
||
if instance, node, vmID, ok := parseGuestID(id); ok {
|
||
for i := range vms {
|
||
vm := &vms[i]
|
||
if vm.VMID == vmID && vm.Node == node && vm.Instance == instance {
|
||
return vm
|
||
}
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
findContainer := func(id string) *models.Container {
|
||
loadFallbackState()
|
||
for i := range containers {
|
||
if containers[i].ID == id {
|
||
return &containers[i]
|
||
}
|
||
}
|
||
if instance, node, vmID, ok := parseGuestID(id); ok {
|
||
for i := range containers {
|
||
ct := &containers[i]
|
||
if ct.VMID == vmID && ct.Node == node && ct.Instance == instance {
|
||
return ct
|
||
}
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
findNode := func(id string) *models.Node {
|
||
loadFallbackState()
|
||
for i := range nodes {
|
||
if nodes[i].ID == id {
|
||
return &nodes[i]
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
findStorage := func(id string) *models.Storage {
|
||
loadFallbackState()
|
||
for i := range storagePools {
|
||
if storagePools[i].ID == id {
|
||
return &storagePools[i]
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
findDockerHost := func(id string) *models.DockerHost {
|
||
loadFallbackState()
|
||
for i := range dockerHosts {
|
||
if dockerHosts[i].ID == id {
|
||
return &dockerHosts[i]
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
findHost := func(id string) *models.Host {
|
||
loadFallbackState()
|
||
for i := range hosts {
|
||
if hosts[i].ID == id {
|
||
return &hosts[i]
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
findDockerContainer := func(id string) *models.DockerContainer {
|
||
loadFallbackState()
|
||
for i := range dockerHosts {
|
||
host := &dockerHosts[i]
|
||
for j := range host.Containers {
|
||
if host.Containers[j].ID == id {
|
||
return &host.Containers[j]
|
||
}
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
findDisk := func(id string) *unifiedresources.Resource {
|
||
loadPhysicalDisks()
|
||
target := strings.TrimSpace(id)
|
||
if target == "" {
|
||
return nil
|
||
}
|
||
|
||
for i := range physicalDisks {
|
||
disk := &physicalDisks[i]
|
||
serial := strings.TrimSpace(disk.PhysicalDisk.Serial)
|
||
wwn := strings.TrimSpace(disk.PhysicalDisk.WWN)
|
||
metricsTargetID := ""
|
||
if disk.MetricsTarget != nil {
|
||
metricsTargetID = strings.TrimSpace(disk.MetricsTarget.ResourceID)
|
||
}
|
||
if strings.EqualFold(disk.ID, target) ||
|
||
(metricsTargetID != "" && strings.EqualFold(metricsTargetID, target)) ||
|
||
(serial != "" && strings.EqualFold(serial, target)) ||
|
||
(wwn != "" && strings.EqualFold(wwn, target)) {
|
||
return disk
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
liveMetricPoints := func(resourceType, resourceID string) map[string]monitoring.MetricPoint {
|
||
now := time.Now()
|
||
points := make(map[string]monitoring.MetricPoint)
|
||
|
||
switch resourceType {
|
||
case "vm":
|
||
vm := findVM(resourceID)
|
||
if vm == nil {
|
||
return points
|
||
}
|
||
points["cpu"] = monitoring.MetricPoint{Timestamp: now, Value: vm.CPU}
|
||
points["memory"] = monitoring.MetricPoint{Timestamp: now, Value: vm.Memory.Usage}
|
||
if vm.Disk.Usage >= 0 {
|
||
points["disk"] = monitoring.MetricPoint{Timestamp: now, Value: vm.Disk.Usage}
|
||
}
|
||
points["diskread"] = monitoring.MetricPoint{Timestamp: now, Value: float64(vm.DiskRead)}
|
||
points["diskwrite"] = monitoring.MetricPoint{Timestamp: now, Value: float64(vm.DiskWrite)}
|
||
points["netin"] = monitoring.MetricPoint{Timestamp: now, Value: float64(vm.NetworkIn)}
|
||
points["netout"] = monitoring.MetricPoint{Timestamp: now, Value: float64(vm.NetworkOut)}
|
||
case "system-container", "oci-container":
|
||
ct := findContainer(resourceID)
|
||
if ct == nil {
|
||
return points
|
||
}
|
||
points["cpu"] = monitoring.MetricPoint{Timestamp: now, Value: ct.CPU}
|
||
points["memory"] = monitoring.MetricPoint{Timestamp: now, Value: ct.Memory.Usage}
|
||
if ct.Disk.Usage >= 0 {
|
||
points["disk"] = monitoring.MetricPoint{Timestamp: now, Value: ct.Disk.Usage}
|
||
}
|
||
points["diskread"] = monitoring.MetricPoint{Timestamp: now, Value: float64(ct.DiskRead)}
|
||
points["diskwrite"] = monitoring.MetricPoint{Timestamp: now, Value: float64(ct.DiskWrite)}
|
||
points["netin"] = monitoring.MetricPoint{Timestamp: now, Value: float64(ct.NetworkIn)}
|
||
points["netout"] = monitoring.MetricPoint{Timestamp: now, Value: float64(ct.NetworkOut)}
|
||
case "node":
|
||
node := findNode(resourceID)
|
||
if node == nil {
|
||
return points
|
||
}
|
||
points["cpu"] = monitoring.MetricPoint{Timestamp: now, Value: node.CPU}
|
||
points["memory"] = monitoring.MetricPoint{Timestamp: now, Value: node.Memory.Usage}
|
||
points["disk"] = monitoring.MetricPoint{Timestamp: now, Value: node.Disk.Usage}
|
||
case "storage":
|
||
storage := findStorage(resourceID)
|
||
if storage == nil {
|
||
return points
|
||
}
|
||
usagePercent := float64(0)
|
||
if storage.Total > 0 {
|
||
usagePercent = (float64(storage.Used) / float64(storage.Total)) * 100
|
||
}
|
||
points["disk"] = monitoring.MetricPoint{Timestamp: now, Value: usagePercent}
|
||
points["usage"] = monitoring.MetricPoint{Timestamp: now, Value: usagePercent}
|
||
points["used"] = monitoring.MetricPoint{Timestamp: now, Value: float64(storage.Used)}
|
||
points["total"] = monitoring.MetricPoint{Timestamp: now, Value: float64(storage.Total)}
|
||
points["avail"] = monitoring.MetricPoint{Timestamp: now, Value: float64(storage.Free)}
|
||
case "docker-host":
|
||
host := findDockerHost(resourceID)
|
||
if host == nil {
|
||
return points
|
||
}
|
||
points["cpu"] = monitoring.MetricPoint{Timestamp: now, Value: host.CPUUsage}
|
||
points["memory"] = monitoring.MetricPoint{Timestamp: now, Value: host.Memory.Usage}
|
||
diskPercent := float64(0)
|
||
if len(host.Disks) > 0 {
|
||
diskPercent = host.Disks[0].Usage
|
||
}
|
||
points["disk"] = monitoring.MetricPoint{Timestamp: now, Value: diskPercent}
|
||
case "agent":
|
||
host := findHost(resourceID)
|
||
if host != nil {
|
||
points["cpu"] = monitoring.MetricPoint{Timestamp: now, Value: host.CPUUsage}
|
||
points["memory"] = monitoring.MetricPoint{Timestamp: now, Value: host.Memory.Usage}
|
||
diskPercent := float64(0)
|
||
if len(host.Disks) > 0 {
|
||
diskPercent = host.Disks[0].Usage
|
||
}
|
||
points["disk"] = monitoring.MetricPoint{Timestamp: now, Value: diskPercent}
|
||
// Note: We intentionally don't include netin/netout here because the host model
|
||
// only has cumulative RXBytes/TXBytes (total since boot), not rates.
|
||
// The RateTracker in ApplyHostReport calculates rates and stores them in metrics history.
|
||
// Showing cumulative bytes as if they were rates would be misleading (showing GB instead of KB/s).
|
||
return points
|
||
}
|
||
node := findNode(resourceID)
|
||
if node == nil {
|
||
return points
|
||
}
|
||
points["cpu"] = monitoring.MetricPoint{Timestamp: now, Value: node.CPU}
|
||
points["memory"] = monitoring.MetricPoint{Timestamp: now, Value: node.Memory.Usage}
|
||
points["disk"] = monitoring.MetricPoint{Timestamp: now, Value: node.Disk.Usage}
|
||
case "app-container":
|
||
container := findDockerContainer(resourceID)
|
||
if container == nil {
|
||
return points
|
||
}
|
||
points["cpu"] = monitoring.MetricPoint{Timestamp: now, Value: container.CPUPercent}
|
||
points["memory"] = monitoring.MetricPoint{Timestamp: now, Value: container.MemoryPercent}
|
||
if container.RootFilesystemBytes > 0 && container.WritableLayerBytes > 0 {
|
||
diskPercent := float64(container.WritableLayerBytes) / float64(container.RootFilesystemBytes) * 100
|
||
if diskPercent > 100 {
|
||
diskPercent = 100
|
||
}
|
||
points["disk"] = monitoring.MetricPoint{Timestamp: now, Value: diskPercent}
|
||
}
|
||
case "disk":
|
||
disk := findDisk(resourceID)
|
||
if disk == nil || disk.PhysicalDisk == nil {
|
||
return points
|
||
}
|
||
pd := disk.PhysicalDisk
|
||
if pd.Temperature > 0 {
|
||
points["smart_temp"] = monitoring.MetricPoint{Timestamp: now, Value: float64(pd.Temperature)}
|
||
}
|
||
if pd.SMART != nil {
|
||
s := pd.SMART
|
||
if s.PowerOnHours > 0 {
|
||
points["smart_power_on_hours"] = monitoring.MetricPoint{Timestamp: now, Value: float64(s.PowerOnHours)}
|
||
}
|
||
if s.PowerCycles > 0 {
|
||
points["smart_power_cycles"] = monitoring.MetricPoint{Timestamp: now, Value: float64(s.PowerCycles)}
|
||
}
|
||
if s.ReallocatedSectors > 0 {
|
||
points["smart_reallocated_sectors"] = monitoring.MetricPoint{Timestamp: now, Value: float64(s.ReallocatedSectors)}
|
||
}
|
||
if s.PendingSectors > 0 {
|
||
points["smart_pending_sectors"] = monitoring.MetricPoint{Timestamp: now, Value: float64(s.PendingSectors)}
|
||
}
|
||
if s.OfflineUncorrectable > 0 {
|
||
points["smart_offline_uncorrectable"] = monitoring.MetricPoint{Timestamp: now, Value: float64(s.OfflineUncorrectable)}
|
||
}
|
||
if s.UDMACRCErrors > 0 {
|
||
points["smart_crc_errors"] = monitoring.MetricPoint{Timestamp: now, Value: float64(s.UDMACRCErrors)}
|
||
}
|
||
if s.PercentageUsed > 0 {
|
||
points["smart_percentage_used"] = monitoring.MetricPoint{Timestamp: now, Value: float64(s.PercentageUsed)}
|
||
}
|
||
if s.AvailableSpare > 0 {
|
||
points["smart_available_spare"] = monitoring.MetricPoint{Timestamp: now, Value: float64(s.AvailableSpare)}
|
||
}
|
||
if s.MediaErrors > 0 {
|
||
points["smart_media_errors"] = monitoring.MetricPoint{Timestamp: now, Value: float64(s.MediaErrors)}
|
||
}
|
||
if s.UnsafeShutdowns > 0 {
|
||
points["smart_unsafe_shutdowns"] = monitoring.MetricPoint{Timestamp: now, Value: float64(s.UnsafeShutdowns)}
|
||
}
|
||
}
|
||
}
|
||
|
||
return points
|
||
}
|
||
|
||
fallbackSingle := func() ([]map[string]interface{}, string, bool) {
|
||
if !fallbackAllowed || metricType == "" {
|
||
return nil, "", false
|
||
}
|
||
|
||
if mock.IsMockEnabled() && runtimeResourceType == "disk" {
|
||
current := 0.0
|
||
if disk := findDisk(resourceID); disk != nil && disk.PhysicalDisk != nil && metricType == "smart_temp" {
|
||
current = float64(disk.PhysicalDisk.Temperature)
|
||
}
|
||
if current > 0 || metricType == "disk" || metricType == "diskread" || metricType == "diskwrite" {
|
||
series := buildSyntheticMetricHistorySeries(
|
||
end,
|
||
duration,
|
||
historyMaxPoints,
|
||
"disk",
|
||
resourceID,
|
||
metricType,
|
||
current,
|
||
)
|
||
if len(series) > 0 {
|
||
return buildHistoryPoints(series, stepSecs), historySourceMock, true
|
||
}
|
||
}
|
||
}
|
||
|
||
switch runtimeResourceType {
|
||
case "vm", "system-container", "oci-container":
|
||
metrics := monitor.GetGuestMetrics(resourceID, duration)
|
||
points := metrics[metricType]
|
||
if len(points) == 0 {
|
||
livePoints := liveMetricPoints(runtimeResourceType, resourceID)
|
||
if live, ok := livePoints[metricType]; ok {
|
||
return buildHistoryPoints([]monitoring.MetricPoint{live}, 0), historySourceLive, true
|
||
}
|
||
return nil, "", false
|
||
}
|
||
return buildHistoryPoints(points, stepSecs), historySourceMemory, true
|
||
case "docker-host":
|
||
metrics := monitor.GetGuestMetrics(fmt.Sprintf("dockerHost:%s", resourceID), duration)
|
||
points := metrics[metricType]
|
||
if len(points) == 0 {
|
||
livePoints := liveMetricPoints(runtimeResourceType, resourceID)
|
||
if live, ok := livePoints[metricType]; ok {
|
||
return buildHistoryPoints([]monitoring.MetricPoint{live}, 0), historySourceLive, true
|
||
}
|
||
return nil, "", false
|
||
}
|
||
return buildHistoryPoints(points, stepSecs), historySourceMemory, true
|
||
case "agent":
|
||
metrics := monitor.GetGuestMetrics(fmt.Sprintf("agent:%s", resourceID), duration)
|
||
points := metrics[metricType]
|
||
if len(points) == 0 {
|
||
points = monitor.GetNodeMetrics(resourceID, metricType, duration)
|
||
}
|
||
if len(points) == 0 {
|
||
livePoints := liveMetricPoints(runtimeResourceType, resourceID)
|
||
if live, ok := livePoints[metricType]; ok {
|
||
return buildHistoryPoints([]monitoring.MetricPoint{live}, 0), historySourceLive, true
|
||
}
|
||
return nil, "", false
|
||
}
|
||
return buildHistoryPoints(points, stepSecs), historySourceMemory, true
|
||
case "app-container":
|
||
metrics := monitor.GetGuestMetrics(fmt.Sprintf("docker:%s", resourceID), duration)
|
||
points := metrics[metricType]
|
||
if len(points) == 0 {
|
||
livePoints := liveMetricPoints(runtimeResourceType, resourceID)
|
||
if live, ok := livePoints[metricType]; ok {
|
||
return buildHistoryPoints([]monitoring.MetricPoint{live}, 0), historySourceLive, true
|
||
}
|
||
return nil, "", false
|
||
}
|
||
return buildHistoryPoints(points, stepSecs), historySourceMemory, true
|
||
case "k8s":
|
||
metrics := monitor.GetGuestMetrics(resourceID, duration)
|
||
points := metrics[metricType]
|
||
if len(points) == 0 {
|
||
livePoints := liveMetricPoints(runtimeResourceType, resourceID)
|
||
if live, ok := livePoints[metricType]; ok {
|
||
return buildHistoryPoints([]monitoring.MetricPoint{live}, 0), historySourceLive, true
|
||
}
|
||
return nil, "", false
|
||
}
|
||
return buildHistoryPoints(points, stepSecs), historySourceMemory, true
|
||
case "node":
|
||
points := monitor.GetNodeMetrics(resourceID, metricType, duration)
|
||
if len(points) == 0 {
|
||
livePoints := liveMetricPoints(runtimeResourceType, resourceID)
|
||
if live, ok := livePoints[metricType]; ok {
|
||
return buildHistoryPoints([]monitoring.MetricPoint{live}, 0), historySourceLive, true
|
||
}
|
||
return nil, "", false
|
||
}
|
||
return buildHistoryPoints(points, stepSecs), historySourceMemory, true
|
||
case "storage":
|
||
metrics := monitor.GetStorageMetrics(resourceID, duration)
|
||
points := metrics[queryMetric]
|
||
if len(points) == 0 {
|
||
livePoints := liveMetricPoints(runtimeResourceType, resourceID)
|
||
if live, ok := livePoints[metricType]; ok {
|
||
return buildHistoryPoints([]monitoring.MetricPoint{live}, 0), historySourceLive, true
|
||
}
|
||
return nil, "", false
|
||
}
|
||
return buildHistoryPoints(points, stepSecs), historySourceMemory, true
|
||
case "disk":
|
||
points := monitor.GetDiskMetricsForChart(resourceID, queryMetric, duration)
|
||
if len(points) == 0 {
|
||
livePoints := liveMetricPoints(runtimeResourceType, resourceID)
|
||
if live, ok := livePoints[metricType]; ok {
|
||
return buildHistoryPoints([]monitoring.MetricPoint{live}, 0), historySourceLive, true
|
||
}
|
||
return nil, "", false
|
||
}
|
||
return buildHistoryPoints(points, stepSecs), historySourceMemory, true
|
||
default:
|
||
livePoints := liveMetricPoints(runtimeResourceType, resourceID)
|
||
if live, ok := livePoints[metricType]; ok {
|
||
return buildHistoryPoints([]monitoring.MetricPoint{live}, 0), historySourceLive, true
|
||
}
|
||
return nil, "", false
|
||
}
|
||
}
|
||
|
||
fallbackAll := func() (map[string][]map[string]interface{}, string, bool) {
|
||
if !fallbackAllowed || metricType != "" {
|
||
return nil, "", false
|
||
}
|
||
|
||
var metrics map[string][]monitoring.MetricPoint
|
||
switch runtimeResourceType {
|
||
case "vm", "system-container", "oci-container":
|
||
metrics = monitor.GetGuestMetrics(resourceID, duration)
|
||
case "k8s":
|
||
metrics = monitor.GetGuestMetrics(resourceID, duration)
|
||
case "docker-host":
|
||
metrics = monitor.GetGuestMetrics(fmt.Sprintf("dockerHost:%s", resourceID), duration)
|
||
case "agent":
|
||
metrics = monitor.GetGuestMetrics(fmt.Sprintf("agent:%s", resourceID), duration)
|
||
if len(metrics) == 0 {
|
||
metrics = map[string][]monitoring.MetricPoint{
|
||
"cpu": monitor.GetNodeMetrics(resourceID, "cpu", duration),
|
||
"memory": monitor.GetNodeMetrics(resourceID, "memory", duration),
|
||
"disk": monitor.GetNodeMetrics(resourceID, "disk", duration),
|
||
}
|
||
}
|
||
case "app-container":
|
||
metrics = monitor.GetGuestMetrics(fmt.Sprintf("docker:%s", resourceID), duration)
|
||
case "storage":
|
||
metrics = monitor.GetStorageMetrics(resourceID, duration)
|
||
case "disk":
|
||
metrics = map[string][]monitoring.MetricPoint{
|
||
"disk": monitor.GetDiskMetricsForChart(resourceID, "disk", duration),
|
||
"diskread": monitor.GetDiskMetricsForChart(resourceID, "diskread", duration),
|
||
"diskwrite": monitor.GetDiskMetricsForChart(resourceID, "diskwrite", duration),
|
||
"smart_temp": monitor.GetDiskMetricsForChart(resourceID, "smart_temp", duration),
|
||
}
|
||
default:
|
||
if runtimeResourceType == "node" {
|
||
metrics = map[string][]monitoring.MetricPoint{
|
||
"cpu": monitor.GetNodeMetrics(resourceID, "cpu", duration),
|
||
"memory": monitor.GetNodeMetrics(resourceID, "memory", duration),
|
||
"disk": monitor.GetNodeMetrics(resourceID, "disk", duration),
|
||
}
|
||
} else {
|
||
return nil, "", false
|
||
}
|
||
}
|
||
|
||
apiData := make(map[string][]map[string]interface{})
|
||
source := historySourceMemory
|
||
for metric, points := range metrics {
|
||
if len(points) == 0 {
|
||
continue
|
||
}
|
||
apiData[metric] = buildHistoryPoints(points, stepSecs)
|
||
}
|
||
if len(apiData) == 0 {
|
||
livePoints := liveMetricPoints(runtimeResourceType, resourceID)
|
||
for metric, point := range livePoints {
|
||
apiData[metric] = buildHistoryPoints([]monitoring.MetricPoint{point}, 0)
|
||
}
|
||
source = historySourceLive
|
||
}
|
||
if len(apiData) == 0 {
|
||
return nil, "", false
|
||
}
|
||
return apiData, source, true
|
||
}
|
||
|
||
store := monitor.GetMetricsStore()
|
||
queryStoreMetric := func(metric string) ([]metricstore.MetricPoint, string, error) {
|
||
if len(storeResourceTypes) == 0 {
|
||
return nil, runtimeResourceType, nil
|
||
}
|
||
for _, storeType := range storeResourceTypes {
|
||
points, err := store.Query(storeType, resourceID, metric, start, end, stepSecs)
|
||
if err != nil {
|
||
return nil, storeType, err
|
||
}
|
||
if len(points) > 0 {
|
||
return points, storeType, nil
|
||
}
|
||
}
|
||
return nil, storeResourceTypes[0], nil
|
||
}
|
||
queryStoreAllMetrics := func() (map[string][]metricstore.MetricPoint, string, error) {
|
||
if len(storeResourceTypes) == 0 {
|
||
return nil, runtimeResourceType, nil
|
||
}
|
||
for _, storeType := range storeResourceTypes {
|
||
metricsMap, err := store.QueryAll(storeType, resourceID, start, end, stepSecs)
|
||
if err != nil {
|
||
return nil, storeType, err
|
||
}
|
||
if len(metricsMap) > 0 {
|
||
return metricsMap, storeType, nil
|
||
}
|
||
}
|
||
return nil, storeResourceTypes[0], nil
|
||
}
|
||
if store == nil {
|
||
if metricType != "" {
|
||
if apiPoints, source, ok := fallbackSingle(); ok {
|
||
log.Warn().
|
||
Str("resourceType", runtimeResourceType).
|
||
Str("resourceId", resourceID).
|
||
Str("metric", metricType).
|
||
Str("source", source).
|
||
Msg("Metrics store unavailable; serving history from fallback source")
|
||
response := map[string]interface{}{
|
||
"resourceType": responseResourceType,
|
||
"resourceId": resourceID,
|
||
"metric": metricType,
|
||
"range": timeRange,
|
||
"start": start.UnixMilli(),
|
||
"end": end.UnixMilli(),
|
||
"points": apiPoints,
|
||
"source": source,
|
||
}
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(response)
|
||
return
|
||
}
|
||
} else {
|
||
if apiData, source, ok := fallbackAll(); ok {
|
||
log.Warn().
|
||
Str("resourceType", runtimeResourceType).
|
||
Str("resourceId", resourceID).
|
||
Str("source", source).
|
||
Msg("Metrics store unavailable; serving history from fallback source")
|
||
response := map[string]interface{}{
|
||
"resourceType": responseResourceType,
|
||
"resourceId": resourceID,
|
||
"range": timeRange,
|
||
"start": start.UnixMilli(),
|
||
"end": end.UnixMilli(),
|
||
"metrics": apiData,
|
||
"source": source,
|
||
}
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(response)
|
||
return
|
||
}
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
w.WriteHeader(http.StatusServiceUnavailable)
|
||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||
"error": "Persistent metrics store not available",
|
||
})
|
||
return
|
||
}
|
||
|
||
var response interface{}
|
||
|
||
if metricType != "" {
|
||
source := historySourceStore
|
||
// Query single metric type
|
||
points, storeTypeUsed, err := queryStoreMetric(queryMetric)
|
||
if err != nil {
|
||
log.Error().Err(err).
|
||
Str("resourceType", runtimeResourceType).
|
||
Str("storeType", storeTypeUsed).
|
||
Str("resourceId", resourceID).
|
||
Str("metric", metricType).
|
||
Msg("Failed to query metrics history")
|
||
http.Error(w, "Failed to query metrics", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
if len(points) == 0 {
|
||
if apiPoints, fallbackSource, ok := fallbackSingle(); ok {
|
||
source = fallbackSource
|
||
log.Info().
|
||
Str("resourceType", runtimeResourceType).
|
||
Str("resourceId", resourceID).
|
||
Str("metric", metricType).
|
||
Str("source", source).
|
||
Msg("Metrics store empty; serving history from fallback source")
|
||
response = map[string]interface{}{
|
||
"resourceType": responseResourceType,
|
||
"resourceId": resourceID,
|
||
"metric": metricType,
|
||
"range": timeRange,
|
||
"start": start.UnixMilli(),
|
||
"end": end.UnixMilli(),
|
||
"points": apiPoints,
|
||
"source": source,
|
||
}
|
||
}
|
||
}
|
||
|
||
if response == nil && mock.IsMockEnabled() && runtimeResourceType == "disk" &&
|
||
(metricType == "smart_temp" || metricType == "disk" || metricType == "diskread" || metricType == "diskwrite") {
|
||
targetPoints := targetMockSeriesPoints(duration, historyMaxPoints)
|
||
if len(points) > 0 && len(points) < targetPoints {
|
||
current := points[len(points)-1].Value
|
||
if metricType == "smart_temp" {
|
||
if disk := findDisk(resourceID); disk != nil && disk.PhysicalDisk != nil && disk.PhysicalDisk.Temperature > 0 {
|
||
current = float64(disk.PhysicalDisk.Temperature)
|
||
}
|
||
}
|
||
if metricType != "smart_temp" || current > 0 {
|
||
series := buildSyntheticMetricHistorySeries(
|
||
end,
|
||
duration,
|
||
historyMaxPoints,
|
||
"disk",
|
||
resourceID,
|
||
metricType,
|
||
current,
|
||
)
|
||
if len(series) > len(points) {
|
||
source = historySourceMock
|
||
response = map[string]interface{}{
|
||
"resourceType": responseResourceType,
|
||
"resourceId": resourceID,
|
||
"metric": metricType,
|
||
"range": timeRange,
|
||
"start": start.UnixMilli(),
|
||
"end": end.UnixMilli(),
|
||
"points": buildHistoryPoints(series, stepSecs),
|
||
"source": source,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Convert to frontend format (timestamps in milliseconds)
|
||
if response == nil {
|
||
apiPoints := make([]map[string]interface{}, len(points))
|
||
for i, p := range points {
|
||
apiPoints[i] = map[string]interface{}{
|
||
"timestamp": p.Timestamp.UnixMilli(),
|
||
"value": p.Value,
|
||
"min": p.Min,
|
||
"max": p.Max,
|
||
}
|
||
}
|
||
|
||
response = map[string]interface{}{
|
||
"resourceType": responseResourceType,
|
||
"resourceId": resourceID,
|
||
"metric": metricType,
|
||
"range": timeRange,
|
||
"start": start.UnixMilli(),
|
||
"end": end.UnixMilli(),
|
||
"points": apiPoints,
|
||
"source": source,
|
||
}
|
||
}
|
||
} else {
|
||
source := historySourceStore
|
||
// Query all metrics for this resource
|
||
metricsMap, storeTypeUsed, err := queryStoreAllMetrics()
|
||
if err != nil {
|
||
log.Error().Err(err).
|
||
Str("resourceType", runtimeResourceType).
|
||
Str("storeType", storeTypeUsed).
|
||
Str("resourceId", resourceID).
|
||
Msg("Failed to query all metrics history")
|
||
http.Error(w, "Failed to query metrics", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
if len(metricsMap) == 0 {
|
||
if apiData, fallbackSource, ok := fallbackAll(); ok {
|
||
source = fallbackSource
|
||
log.Info().
|
||
Str("resourceType", runtimeResourceType).
|
||
Str("resourceId", resourceID).
|
||
Str("source", source).
|
||
Msg("Metrics store empty; serving history from fallback source")
|
||
response = map[string]interface{}{
|
||
"resourceType": responseResourceType,
|
||
"resourceId": resourceID,
|
||
"range": timeRange,
|
||
"start": start.UnixMilli(),
|
||
"end": end.UnixMilli(),
|
||
"metrics": apiData,
|
||
"source": source,
|
||
}
|
||
}
|
||
}
|
||
|
||
// Convert to frontend format
|
||
if response == nil {
|
||
apiData := make(map[string][]map[string]interface{})
|
||
for metric, points := range metricsMap {
|
||
apiPoints := make([]map[string]interface{}, len(points))
|
||
for i, p := range points {
|
||
apiPoints[i] = map[string]interface{}{
|
||
"timestamp": p.Timestamp.UnixMilli(),
|
||
"value": p.Value,
|
||
"min": p.Min,
|
||
"max": p.Max,
|
||
}
|
||
}
|
||
apiData[metric] = apiPoints
|
||
}
|
||
|
||
response = map[string]interface{}{
|
||
"resourceType": responseResourceType,
|
||
"resourceId": resourceID,
|
||
"range": timeRange,
|
||
"start": start.UnixMilli(),
|
||
"end": end.UnixMilli(),
|
||
"metrics": apiData,
|
||
"source": source,
|
||
}
|
||
}
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
if err := json.NewEncoder(w).Encode(response); err != nil {
|
||
log.Error().Err(err).Msg("Failed to encode metrics history response")
|
||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||
}
|
||
}
|
||
|
||
func canonicalizeMetricsHistoryResourceID(runtimeResourceType, resourceID string) string {
|
||
trimmed := strings.TrimSpace(resourceID)
|
||
if runtimeResourceType != "k8s" {
|
||
return trimmed
|
||
}
|
||
if strings.Contains(trimmed, ":pod:") {
|
||
return unifiedresources.CanonicalKubernetesPodMetricID(trimmed)
|
||
}
|
||
return trimmed
|
||
}
|
||
|
||
func normalizeMetricsHistoryResourceType(input string) (responseType string, runtimeType string, storeTypes []string, err error) {
|
||
switch strings.ToLower(strings.TrimSpace(input)) {
|
||
case "node":
|
||
return "node", "node", []string{"node"}, nil
|
||
case "storage":
|
||
return "storage", "storage", []string{"storage"}, nil
|
||
case "agent":
|
||
return "agent", "agent", []string{"agent", "node"}, nil
|
||
case "disk":
|
||
return "disk", "disk", []string{"disk"}, nil
|
||
case "k8s":
|
||
return "k8s", "k8s", []string{"k8s"}, nil
|
||
case "vm":
|
||
return "vm", "vm", []string{"vm"}, nil
|
||
case "system-container":
|
||
return "system-container", "system-container", []string{"container"}, nil
|
||
case "oci-container":
|
||
return "oci-container", "oci-container", []string{"container"}, nil
|
||
case "app-container":
|
||
return "app-container", "app-container", []string{"dockerContainer", "docker"}, nil
|
||
case "docker-host":
|
||
return "docker-host", "docker-host", []string{"dockerHost"}, nil
|
||
default:
|
||
return "", "", nil, fmt.Errorf("unsupported resourceType %q", input)
|
||
}
|
||
}
|
||
|
||
// handleConfig handles configuration requests
|
||
func (r *Router) handleConfig(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodGet {
|
||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
|
||
config.Mu.RLock()
|
||
defer config.Mu.RUnlock()
|
||
|
||
// Return public configuration
|
||
payload := map[string]interface{}{
|
||
"csrfProtection": false, // Not implemented yet
|
||
"autoUpdateEnabled": config.EffectiveAutoUpdateEnabled(r.config.UpdateChannel, r.config.AutoUpdateEnabled),
|
||
"updateChannel": config.EffectiveUpdateChannel(r.config.UpdateChannel, ""),
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
json.NewEncoder(w).Encode(payload)
|
||
}
|
||
|
||
// handleWebSocket handles WebSocket connections
|
||
func (r *Router) handleWebSocket(w http.ResponseWriter, req *http.Request) {
|
||
// Check authentication before allowing WebSocket upgrade
|
||
if !CheckAuth(r.config, w, req) {
|
||
return
|
||
}
|
||
// SECURITY: Ensure monitoring:read scope for WebSocket connections
|
||
// This prevents tokens with only agent scopes from accessing full infra state via requestData
|
||
if !ensureScope(w, req, config.ScopeMonitoringRead) {
|
||
return
|
||
}
|
||
|
||
boundReq, ok := bindWebSocketOrgToTenantContext(w, req)
|
||
if !ok {
|
||
return
|
||
}
|
||
|
||
r.wsHub.HandleWebSocket(w, boundReq)
|
||
}
|
||
|
||
// handleSimpleStats serves a simple stats page
|
||
func (r *Router) handleSimpleStats(w http.ResponseWriter, req *http.Request) {
|
||
nonce := CSPNonceFromContext(req.Context())
|
||
nonceAttr := ""
|
||
if nonce != "" {
|
||
nonceAttr = ` nonce="` + nonce + `"`
|
||
}
|
||
|
||
html := `<!DOCTYPE html>
|
||
<html>
|
||
<head>
|
||
<title>Simple Pulse Stats</title>
|
||
<style` + nonceAttr + `>
|
||
body {
|
||
font-family: Arial, sans-serif;
|
||
margin: 20px;
|
||
background: #f5f5f5;
|
||
}
|
||
table {
|
||
width: 100%;
|
||
border-collapse: collapse;
|
||
background: white;
|
||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||
}
|
||
th, td {
|
||
padding: 12px;
|
||
text-align: left;
|
||
border-bottom: 1px solid #ddd;
|
||
}
|
||
th {
|
||
background: #333;
|
||
color: white;
|
||
font-weight: bold;
|
||
position: sticky;
|
||
top: 0;
|
||
}
|
||
tr:hover {
|
||
background: #f5f5f5;
|
||
}
|
||
.status {
|
||
padding: 4px 8px;
|
||
border-radius: 4px;
|
||
color: white;
|
||
font-size: 12px;
|
||
}
|
||
.running { background: #28a745; }
|
||
.stopped { background: #dc3545; }
|
||
#status {
|
||
margin-bottom: 20px;
|
||
padding: 10px;
|
||
background: #e9ecef;
|
||
border-radius: 4px;
|
||
display: flex;
|
||
justify-content: space-between;
|
||
align-items: center;
|
||
}
|
||
.update-indicator {
|
||
display: inline-block;
|
||
width: 10px;
|
||
height: 10px;
|
||
background: #28a745;
|
||
border-radius: 50%;
|
||
animation: pulse 0.5s ease-out;
|
||
}
|
||
@keyframes pulse {
|
||
0% { transform: scale(1); opacity: 1; }
|
||
50% { transform: scale(1.5); opacity: 0.7; }
|
||
100% { transform: scale(1); opacity: 1; }
|
||
}
|
||
.update-timer {
|
||
font-family: monospace;
|
||
font-size: 14px;
|
||
color: #666;
|
||
}
|
||
.metric {
|
||
font-family: monospace;
|
||
text-align: right;
|
||
}
|
||
#update-indicator { display: none; }
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<h1>Simple Pulse Stats</h1>
|
||
<div id="status">
|
||
<div>
|
||
<span id="status-text">Connecting...</span>
|
||
<span class="update-indicator" id="update-indicator"></span>
|
||
</div>
|
||
<div class="update-timer" id="update-timer"></div>
|
||
</div>
|
||
|
||
<h2>Containers</h2>
|
||
<table id="containers">
|
||
<thead>
|
||
<tr>
|
||
<th>Name</th>
|
||
<th>Status</th>
|
||
<th>CPU %</th>
|
||
<th>Memory</th>
|
||
<th>Disk Read</th>
|
||
<th>Disk Write</th>
|
||
<th>Net In</th>
|
||
<th>Net Out</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody></tbody>
|
||
</table>
|
||
|
||
<script` + nonceAttr + `>
|
||
let ws;
|
||
let lastUpdateTime = null;
|
||
let updateCount = 0;
|
||
let updateInterval = null;
|
||
|
||
function formatBytes(bytes) {
|
||
if (!bytes || bytes < 0) return '0 B/s';
|
||
const units = ['B/s', 'KB/s', 'MB/s', 'GB/s'];
|
||
let i = 0;
|
||
let value = bytes;
|
||
while (value >= 1024 && i < units.length - 1) {
|
||
value /= 1024;
|
||
i++;
|
||
}
|
||
return value.toFixed(1) + ' ' + units[i];
|
||
}
|
||
|
||
function formatMemory(used, total) {
|
||
const usedGB = (used / 1024 / 1024 / 1024).toFixed(1);
|
||
const totalGB = (total / 1024 / 1024 / 1024).toFixed(1);
|
||
const percent = ((used / total) * 100).toFixed(0);
|
||
return usedGB + '/' + totalGB + ' GB (' + percent + '%)';
|
||
}
|
||
|
||
function updateTable(containers) {
|
||
const tbody = document.querySelector('#containers tbody');
|
||
tbody.innerHTML = '';
|
||
|
||
containers.sort((a, b) => a.name.localeCompare(b.name));
|
||
|
||
containers.forEach(ct => {
|
||
const row = document.createElement('tr');
|
||
row.innerHTML =
|
||
'<td><strong>' + ct.name + '</strong></td>' +
|
||
'<td><span class="status ' + ct.status + '">' + ct.status + '</span></td>' +
|
||
'<td class="metric">' + (ct.cpu ? ct.cpu.toFixed(1) : '0.0') + '%</td>' +
|
||
'<td class="metric">' + formatMemory(ct.mem || 0, ct.maxmem || 1) + '</td>' +
|
||
'<td class="metric">' + formatBytes(ct.diskread) + '</td>' +
|
||
'<td class="metric">' + formatBytes(ct.diskwrite) + '</td>' +
|
||
'<td class="metric">' + formatBytes(ct.netin) + '</td>' +
|
||
'<td class="metric">' + formatBytes(ct.netout) + '</td>';
|
||
tbody.appendChild(row);
|
||
});
|
||
}
|
||
|
||
function updateTimer() {
|
||
if (lastUpdateTime) {
|
||
const secondsSince = Math.floor((Date.now() - lastUpdateTime) / 1000);
|
||
document.getElementById('update-timer').textContent = 'Next update in: ' + (2 - (secondsSince % 2)) + 's';
|
||
}
|
||
}
|
||
|
||
function connect() {
|
||
const statusText = document.getElementById('status-text');
|
||
const indicator = document.getElementById('update-indicator');
|
||
statusText.textContent = 'Connecting to WebSocket...';
|
||
|
||
ws = new WebSocket('ws://' + window.location.host + '/ws');
|
||
|
||
ws.onopen = function() {
|
||
statusText.textContent = 'Connected! Updates every 2 seconds';
|
||
console.log('WebSocket connected');
|
||
// Start the countdown timer
|
||
if (updateInterval) clearInterval(updateInterval);
|
||
updateInterval = setInterval(updateTimer, 100);
|
||
};
|
||
|
||
ws.onmessage = function(event) {
|
||
try {
|
||
const msg = JSON.parse(event.data);
|
||
|
||
if (msg.type === 'initialState' || msg.type === 'rawData') {
|
||
if (msg.data && msg.data.containers) {
|
||
updateCount++;
|
||
lastUpdateTime = Date.now();
|
||
|
||
// Show update indicator with animation
|
||
indicator.style.display = 'inline-block';
|
||
indicator.style.animation = 'none';
|
||
setTimeout(() => {
|
||
indicator.style.animation = 'pulse 0.5s ease-out';
|
||
}, 10);
|
||
|
||
statusText.textContent = 'Update #' + updateCount + ' at ' + new Date().toLocaleTimeString();
|
||
updateTable(msg.data.containers);
|
||
}
|
||
}
|
||
} catch (err) {
|
||
console.error('Parse error:', err);
|
||
}
|
||
};
|
||
|
||
ws.onclose = function(event) {
|
||
statusText.textContent = 'Disconnected: ' + event.code + ' ' + event.reason + '. Reconnecting in 3s...';
|
||
indicator.style.display = 'none';
|
||
if (updateInterval) clearInterval(updateInterval);
|
||
setTimeout(connect, 3000);
|
||
};
|
||
|
||
ws.onerror = function(error) {
|
||
statusText.textContent = 'Connection error. Retrying...';
|
||
console.error('WebSocket error:', error);
|
||
};
|
||
}
|
||
|
||
// Start connection
|
||
connect();
|
||
</script>
|
||
</body>
|
||
</html>`
|
||
|
||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||
w.Write([]byte(html))
|
||
}
|
||
|
||
func resolveExplicitWebSocketOrgID(req *http.Request) (string, bool) {
|
||
if req == nil {
|
||
return "", false
|
||
}
|
||
|
||
if headerOrg := strings.TrimSpace(req.Header.Get("X-Pulse-Org-ID")); headerOrg != "" {
|
||
return headerOrg, true
|
||
}
|
||
|
||
if cookie, err := req.Cookie(CookieNameOrgID); err == nil {
|
||
if cookieOrg := strings.TrimSpace(cookie.Value); cookieOrg != "" {
|
||
return cookieOrg, true
|
||
}
|
||
}
|
||
|
||
if queryOrg := strings.TrimSpace(req.URL.Query().Get("org_id")); queryOrg != "" {
|
||
return queryOrg, true
|
||
}
|
||
|
||
return "", false
|
||
}
|
||
|
||
func bindWebSocketOrgToTenantContext(w http.ResponseWriter, req *http.Request) (*http.Request, bool) {
|
||
if req == nil {
|
||
http.Error(w, "Invalid request", http.StatusBadRequest)
|
||
return nil, false
|
||
}
|
||
|
||
contextOrgID := strings.TrimSpace(GetOrgID(req.Context()))
|
||
if contextOrgID == "" {
|
||
contextOrgID = "default"
|
||
}
|
||
|
||
if requestedOrgID, explicit := resolveExplicitWebSocketOrgID(req); explicit {
|
||
if !isValidOrganizationID(requestedOrgID) {
|
||
http.Error(w, "Invalid organization ID", http.StatusBadRequest)
|
||
return nil, false
|
||
}
|
||
if requestedOrgID != contextOrgID {
|
||
http.Error(w, "Unauthorized organization context", http.StatusForbidden)
|
||
return nil, false
|
||
}
|
||
}
|
||
|
||
cloned := req.Clone(req.Context())
|
||
cloned.Header = req.Header.Clone()
|
||
cloned.Header.Set("X-Pulse-Org-ID", contextOrgID)
|
||
return cloned, true
|
||
}
|
||
|
||
// forwardUpdateProgress forwards update progress to WebSocket clients
|
||
func (r *Router) forwardUpdateProgress() {
|
||
progressChan := r.updateManager.GetProgressChannel()
|
||
|
||
for status := range progressChan {
|
||
// Create update event for WebSocket
|
||
message := websocket.Message{
|
||
Type: "update:progress",
|
||
Data: status,
|
||
Timestamp: time.Now().Format(time.RFC3339),
|
||
}
|
||
|
||
// Broadcast to all connected clients
|
||
if r.wsHub != nil {
|
||
r.wsHub.BroadcastMessage(message)
|
||
}
|
||
|
||
// Log progress
|
||
log.Debug().
|
||
Str("status", status.Status).
|
||
Int("progress", status.Progress).
|
||
Str("message", status.Message).
|
||
Msg("Update progress")
|
||
}
|
||
}
|
||
|
||
// backgroundUpdateChecker periodically checks for updates and caches the result
|
||
func (r *Router) backgroundUpdateChecker(ctx context.Context) {
|
||
if ctx == nil {
|
||
ctx = context.Background()
|
||
}
|
||
|
||
// Delay initial check to allow WebSocket clients to receive welcome messages first
|
||
startupDelay := time.NewTimer(1 * time.Second)
|
||
defer startupDelay.Stop()
|
||
|
||
select {
|
||
case <-ctx.Done():
|
||
return
|
||
case <-startupDelay.C:
|
||
}
|
||
|
||
if _, err := r.updateManager.CheckForUpdates(ctx); err != nil {
|
||
log.Debug().Err(err).Msg("Initial update check failed")
|
||
} else {
|
||
log.Info().Msg("Initial update check completed")
|
||
}
|
||
|
||
// Then check every hour
|
||
ticker := time.NewTicker(1 * time.Hour)
|
||
defer ticker.Stop()
|
||
|
||
for {
|
||
select {
|
||
case <-ctx.Done():
|
||
return
|
||
case <-ticker.C:
|
||
if _, err := r.updateManager.CheckForUpdates(ctx); err != nil {
|
||
log.Debug().Err(err).Msg("Periodic update check failed")
|
||
} else {
|
||
log.Debug().Msg("Periodic update check completed")
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
type checksumCacheEntry struct {
|
||
checksum string
|
||
modTime time.Time
|
||
size int64
|
||
}
|
||
|
||
func (r *Router) cachedSHA256(filePath string, info os.FileInfo) (string, error) {
|
||
if filePath == "" {
|
||
return "", fmt.Errorf("empty file path")
|
||
}
|
||
|
||
if info == nil {
|
||
var err error
|
||
info, err = os.Stat(filePath)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
}
|
||
|
||
r.checksumMu.RLock()
|
||
entry, ok := r.checksumCache[filePath]
|
||
r.checksumMu.RUnlock()
|
||
if ok && entry.size == info.Size() && entry.modTime.Equal(info.ModTime()) {
|
||
return entry.checksum, nil
|
||
}
|
||
|
||
file, err := os.Open(filePath)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
defer file.Close()
|
||
|
||
hasher := sha256.New()
|
||
if _, err := io.Copy(hasher, file); err != nil {
|
||
return "", err
|
||
}
|
||
|
||
checksum := hex.EncodeToString(hasher.Sum(nil))
|
||
|
||
r.checksumMu.Lock()
|
||
if r.checksumCache == nil {
|
||
r.checksumCache = make(map[string]checksumCacheEntry)
|
||
}
|
||
r.checksumCache[filePath] = checksumCacheEntry{
|
||
checksum: checksum,
|
||
modTime: info.ModTime(),
|
||
size: info.Size(),
|
||
}
|
||
r.checksumMu.Unlock()
|
||
|
||
return checksum, nil
|
||
}
|
||
|
||
// serveChecksum computes and serves the SHA256 checksum of a file
|
||
func (r *Router) serveChecksum(w http.ResponseWriter, filePath string) {
|
||
info, err := os.Stat(filePath)
|
||
if err != nil {
|
||
http.Error(w, "Failed to stat file", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
checksum, err := r.cachedSHA256(filePath, info)
|
||
if err != nil {
|
||
http.Error(w, "Failed to compute checksum", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "text/plain")
|
||
fmt.Fprintf(w, "%s\n", checksum)
|
||
}
|
||
|
||
func (r *Router) handleDiagnosticsDockerPrepareToken(w http.ResponseWriter, req *http.Request) {
|
||
if req.Method != http.MethodPost {
|
||
writeErrorResponse(w, http.StatusMethodNotAllowed, "method_not_allowed", "Only POST is allowed", nil)
|
||
return
|
||
}
|
||
|
||
var payload struct {
|
||
AgentID string `json:"agentId"`
|
||
TokenName string `json:"tokenName"`
|
||
}
|
||
|
||
if err := json.NewDecoder(req.Body).Decode(&payload); err != nil {
|
||
writeErrorResponse(w, http.StatusBadRequest, "invalid_json", "Failed to decode request body", nil)
|
||
return
|
||
}
|
||
|
||
agentID := strings.TrimSpace(payload.AgentID)
|
||
if agentID == "" {
|
||
writeErrorResponse(w, http.StatusBadRequest, "missing_agent_id", "Agent ID (agentId) is required", nil)
|
||
return
|
||
}
|
||
|
||
orgID := strings.TrimSpace(GetOrgID(req.Context()))
|
||
|
||
monitor := r.getTenantMonitor(req.Context())
|
||
if orgID != "" && orgID != "default" {
|
||
// Security-sensitive endpoint: do not fall back to the default monitor for tenant-scoped requests.
|
||
if r.mtMonitor == nil {
|
||
writeErrorResponse(w, http.StatusInternalServerError, "tenant_unavailable", "Tenant monitor is not configured", nil)
|
||
return
|
||
}
|
||
tenantMonitor, err := r.mtMonitor.GetMonitor(orgID)
|
||
if err != nil || tenantMonitor == nil {
|
||
writeErrorResponse(w, http.StatusInternalServerError, "tenant_unavailable", "Failed to resolve tenant monitor", nil)
|
||
return
|
||
}
|
||
monitor = tenantMonitor
|
||
}
|
||
if monitor == nil {
|
||
writeErrorResponse(w, http.StatusInternalServerError, "monitor_unavailable", "Monitor is not configured", nil)
|
||
return
|
||
}
|
||
|
||
readState := monitor.GetUnifiedReadStateOrSnapshot()
|
||
if readState == nil {
|
||
writeErrorResponse(w, http.StatusInternalServerError, "read_state_unavailable", "Container runtime state is not available", nil)
|
||
return
|
||
}
|
||
|
||
var host *unifiedresources.DockerHostView
|
||
for _, candidate := range readState.DockerHosts() {
|
||
if candidate == nil {
|
||
continue
|
||
}
|
||
if candidate.HostSourceID() == agentID || candidate.ID() == agentID {
|
||
host = candidate
|
||
break
|
||
}
|
||
}
|
||
if host == nil {
|
||
writeErrorResponse(w, http.StatusNotFound, "agent_not_found", "Container runtime not found", nil)
|
||
return
|
||
}
|
||
hostID := host.HostSourceID()
|
||
if hostID == "" {
|
||
hostID = host.ID()
|
||
}
|
||
|
||
name := strings.TrimSpace(payload.TokenName)
|
||
if name == "" {
|
||
displayName := preferredDockerHostName(host)
|
||
name = fmt.Sprintf("Container runtime: %s", displayName)
|
||
}
|
||
|
||
rawToken, err := auth.GenerateAPIToken()
|
||
if err != nil {
|
||
log.Error().Err(err).Msg("Failed to generate container runtime migration token")
|
||
writeErrorResponse(w, http.StatusInternalServerError, "token_generation_failed", "Failed to generate API token", nil)
|
||
return
|
||
}
|
||
|
||
record, err := config.NewAPITokenRecord(rawToken, name, []string{config.ScopeDockerReport})
|
||
if err != nil {
|
||
log.Error().Err(err).Msg("Failed to construct token record for container runtime migration")
|
||
writeErrorResponse(w, http.StatusInternalServerError, "token_generation_failed", "Failed to generate API token", nil)
|
||
return
|
||
}
|
||
record.OrgID = orgID
|
||
if record.OrgID == "" {
|
||
record.OrgID = "default"
|
||
}
|
||
|
||
activeConfig := r.config
|
||
activePersistence := r.persistence
|
||
if orgID != "" && orgID != "default" {
|
||
activeConfig = monitor.GetConfig()
|
||
if activeConfig == nil {
|
||
writeErrorResponse(w, http.StatusInternalServerError, "tenant_config_unavailable", "Tenant config is not available", nil)
|
||
return
|
||
}
|
||
if r.multiTenant == nil {
|
||
writeErrorResponse(w, http.StatusInternalServerError, "tenant_persistence_unavailable", "Tenant persistence is not configured", nil)
|
||
return
|
||
}
|
||
tenantPersistence, err := r.multiTenant.GetPersistence(orgID)
|
||
if err != nil {
|
||
writeErrorResponse(w, http.StatusInternalServerError, "tenant_persistence_unavailable", "Failed to resolve tenant persistence", nil)
|
||
return
|
||
}
|
||
activePersistence = tenantPersistence
|
||
}
|
||
if activeConfig == nil {
|
||
writeErrorResponse(w, http.StatusInternalServerError, "config_unavailable", "Configuration is not loaded", nil)
|
||
return
|
||
}
|
||
|
||
config.Mu.Lock()
|
||
activeConfig.APITokens = append(activeConfig.APITokens, *record)
|
||
activeConfig.SortAPITokens()
|
||
|
||
if activePersistence != nil {
|
||
if err := activePersistence.SaveAPITokens(activeConfig.APITokens); err != nil {
|
||
activeConfig.RemoveAPIToken(record.ID)
|
||
config.Mu.Unlock()
|
||
log.Error().Err(err).Msg("Failed to persist API tokens after container runtime migration generation")
|
||
writeErrorResponse(w, http.StatusInternalServerError, "token_persist_failed", "Failed to persist API token", nil)
|
||
return
|
||
}
|
||
}
|
||
config.Mu.Unlock()
|
||
|
||
baseURL := normalizeAgentInstallBaseURL(r.resolvePublicURL(req))
|
||
installCommand := buildContainerRuntimeAgentInstallCommand(baseURL, rawToken)
|
||
systemdSnippet := fmt.Sprintf("[Service]\nType=simple\nEnvironment=\"PULSE_URL=%s\"\nEnvironment=\"PULSE_TOKEN=%s\"\nExecStart=/usr/local/bin/pulse-agent --url %s --token %s --enable-docker --enable-host=false --interval 30s\nRestart=always\nRestartSec=5s\nUser=root", baseURL, rawToken, baseURL, rawToken)
|
||
|
||
response := map[string]any{
|
||
"success": true,
|
||
"token": rawToken,
|
||
"record": toAPITokenDTO(*record),
|
||
"agent": map[string]any{
|
||
"id": hostID,
|
||
"name": preferredDockerHostName(host),
|
||
},
|
||
"installCommand": installCommand,
|
||
"systemdServiceSnippet": systemdSnippet,
|
||
"pulseURL": baseURL,
|
||
}
|
||
|
||
if err := utils.WriteJSONResponse(w, response); err != nil {
|
||
log.Error().Err(err).Msg("Failed to serialize container runtime token migration response")
|
||
}
|
||
}
|
||
|
||
func (r *Router) resolvePublicURL(req *http.Request) string {
|
||
// Hosted mode must never fall back to request host or localhost.
|
||
// A canonical externally-reachable URL must be configured via PublicURL / AgentConnectURL.
|
||
if r != nil && r.hostedMode {
|
||
if agentConnectURL := strings.TrimSpace(r.config.AgentConnectURL); agentConnectURL != "" {
|
||
return strings.TrimRight(agentConnectURL, "/")
|
||
}
|
||
if publicURL := strings.TrimSpace(r.config.PublicURL); publicURL != "" {
|
||
return strings.TrimRight(publicURL, "/")
|
||
}
|
||
return ""
|
||
}
|
||
|
||
if agentConnectURL := strings.TrimSpace(r.config.AgentConnectURL); agentConnectURL != "" {
|
||
return strings.TrimRight(agentConnectURL, "/")
|
||
}
|
||
|
||
if publicURL := strings.TrimSpace(r.config.PublicURL); publicURL != "" {
|
||
return strings.TrimRight(publicURL, "/")
|
||
}
|
||
|
||
scheme := "http"
|
||
if req != nil {
|
||
if req.TLS != nil {
|
||
scheme = "https"
|
||
} else if proto := req.Header.Get("X-Forwarded-Proto"); strings.EqualFold(proto, "https") {
|
||
scheme = "https"
|
||
}
|
||
}
|
||
|
||
host := ""
|
||
if req != nil {
|
||
host = strings.TrimSpace(req.Host)
|
||
}
|
||
if host == "" {
|
||
if r.config.FrontendPort > 0 {
|
||
host = fmt.Sprintf("localhost:%d", r.config.FrontendPort)
|
||
} else {
|
||
host = "localhost:7655"
|
||
}
|
||
}
|
||
|
||
return fmt.Sprintf("%s://%s", scheme, host)
|
||
}
|
||
|
||
func fileExists(path string) bool {
|
||
_, err := os.Stat(path)
|
||
return err == nil
|
||
}
|
||
|
||
// knowledgeStoreProviderWrapper adapts knowledge.Store to tools.KnowledgeStoreProvider.
|
||
type knowledgeStoreProviderWrapper struct {
|
||
store *knowledge.Store
|
||
}
|
||
|
||
func (w *knowledgeStoreProviderWrapper) SaveNote(resourceID, note, category string) error {
|
||
if w.store == nil {
|
||
return fmt.Errorf("knowledge store not available")
|
||
}
|
||
// Use resourceID as both guestID and guestName, with a generic type and category
|
||
return w.store.SaveNote(resourceID, resourceID, "resource", category, "Note", note)
|
||
}
|
||
|
||
func (w *knowledgeStoreProviderWrapper) GetKnowledge(resourceID string, category string) []tools.KnowledgeEntry {
|
||
if w.store == nil {
|
||
return nil
|
||
}
|
||
|
||
guestKnowledge, err := w.store.GetKnowledge(resourceID)
|
||
if err != nil || guestKnowledge == nil {
|
||
return nil
|
||
}
|
||
|
||
var result []tools.KnowledgeEntry
|
||
|
||
// If category is specified, only get notes from that category
|
||
if category != "" {
|
||
notes, err := w.store.GetNotesByCategory(resourceID, category)
|
||
if err != nil {
|
||
return nil
|
||
}
|
||
for _, note := range notes {
|
||
result = append(result, tools.KnowledgeEntry{
|
||
ID: note.ID,
|
||
ResourceID: resourceID,
|
||
Note: note.Content,
|
||
Category: note.Category,
|
||
CreatedAt: note.CreatedAt,
|
||
UpdatedAt: note.UpdatedAt,
|
||
})
|
||
}
|
||
return result
|
||
}
|
||
|
||
// Otherwise return all notes
|
||
for _, note := range guestKnowledge.Notes {
|
||
result = append(result, tools.KnowledgeEntry{
|
||
ID: note.ID,
|
||
ResourceID: resourceID,
|
||
Note: note.Content,
|
||
Category: note.Category,
|
||
CreatedAt: note.CreatedAt,
|
||
UpdatedAt: note.UpdatedAt,
|
||
})
|
||
}
|
||
return result
|
||
}
|
||
|
||
type mockSupplementalRecordsAdapter struct {
|
||
source unifiedresources.DataSource
|
||
}
|
||
|
||
func (a mockSupplementalRecordsAdapter) GetCurrentRecords() []unifiedresources.IngestRecord {
|
||
return a.GetCurrentRecordsForOrg("default")
|
||
}
|
||
|
||
func (a mockSupplementalRecordsAdapter) GetCurrentRecordsForOrg(orgID string) []unifiedresources.IngestRecord {
|
||
if strings.TrimSpace(orgID) != "" && strings.TrimSpace(orgID) != "default" {
|
||
return nil
|
||
}
|
||
return mock.SupplementalRecords(a.source)
|
||
}
|
||
|
||
func (a mockSupplementalRecordsAdapter) SupplementalRecords(_ *monitoring.Monitor, orgID string) []unifiedresources.IngestRecord {
|
||
return a.GetCurrentRecordsForOrg(orgID)
|
||
}
|
||
|
||
func (a mockSupplementalRecordsAdapter) SnapshotOwnedSources() []unifiedresources.DataSource {
|
||
normalized := normalizeDataSourceAlias(a.source)
|
||
if normalized == "" {
|
||
return nil
|
||
}
|
||
return []unifiedresources.DataSource{normalized}
|
||
}
|
||
|
||
func (a mockSupplementalRecordsAdapter) SnapshotOwnedSourcesForOrg(string) []unifiedresources.DataSource {
|
||
return a.SnapshotOwnedSources()
|
||
}
|
||
|
||
func (a mockSupplementalRecordsAdapter) SupplementalInventoryReadyAt(*monitoring.Monitor, string) (time.Time, bool) {
|
||
return time.Time{}, true
|
||
}
|
||
|
||
// trigger rebuild Fri Jan 16 10:52:41 UTC 2026
|