mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-07 08:57:12 +00:00
306 lines
8.5 KiB
Go
306 lines
8.5 KiB
Go
package monitoring
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/config"
|
|
recoverymanager "github.com/rcourtman/pulse-go-rewrite/internal/recovery/manager"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/websocket"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
// MultiTenantMonitor manages a dedicated Monitor instance for each organization.
|
|
type MultiTenantMonitor struct {
|
|
mu sync.RWMutex
|
|
monitors map[string]*Monitor
|
|
tenantCancel map[string]context.CancelFunc
|
|
tenantDone map[string]chan struct{}
|
|
persistence *config.MultiTenantPersistence
|
|
baseConfig *config.Config
|
|
wsHub *websocket.Hub
|
|
recoveryMgr *recoverymanager.Manager
|
|
initializer func(*Monitor)
|
|
globalCtx context.Context
|
|
globalCancel context.CancelFunc
|
|
}
|
|
|
|
// NewMultiTenantMonitor creates a new multi-tenant monitor manager.
|
|
func NewMultiTenantMonitor(baseCfg *config.Config, persistence *config.MultiTenantPersistence, wsHub *websocket.Hub) *MultiTenantMonitor {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
return &MultiTenantMonitor{
|
|
monitors: make(map[string]*Monitor),
|
|
tenantCancel: make(map[string]context.CancelFunc),
|
|
tenantDone: make(map[string]chan struct{}),
|
|
persistence: persistence,
|
|
baseConfig: baseCfg, // Used as a template or for global settings
|
|
wsHub: wsHub,
|
|
globalCtx: ctx,
|
|
globalCancel: cancel,
|
|
}
|
|
}
|
|
|
|
const tenantMonitorShutdownTimeout = 2 * time.Second
|
|
|
|
// SetRecoveryManager wires a recovery store manager into all existing and future tenant monitors.
|
|
func (mtm *MultiTenantMonitor) SetRecoveryManager(manager *recoverymanager.Manager) {
|
|
mtm.mu.Lock()
|
|
defer mtm.mu.Unlock()
|
|
mtm.recoveryMgr = manager
|
|
for _, monitor := range mtm.monitors {
|
|
if monitor == nil {
|
|
continue
|
|
}
|
|
monitor.SetRecoveryManager(manager)
|
|
}
|
|
}
|
|
|
|
// SetMonitorInitializer configures a callback that is applied to all existing
|
|
// and future tenant monitors after creation.
|
|
func (mtm *MultiTenantMonitor) SetMonitorInitializer(initializer func(*Monitor)) {
|
|
if mtm == nil {
|
|
return
|
|
}
|
|
|
|
mtm.mu.Lock()
|
|
mtm.initializer = initializer
|
|
monitors := make([]*Monitor, 0, len(mtm.monitors))
|
|
for _, monitor := range mtm.monitors {
|
|
if monitor == nil {
|
|
continue
|
|
}
|
|
monitors = append(monitors, monitor)
|
|
}
|
|
mtm.mu.Unlock()
|
|
|
|
if initializer == nil {
|
|
return
|
|
}
|
|
for _, monitor := range monitors {
|
|
initializer(monitor)
|
|
}
|
|
}
|
|
|
|
// GetMonitor returns the monitor instance for a specific organization.
|
|
// It lazily initializes the monitor if it doesn't exist.
|
|
func (mtm *MultiTenantMonitor) GetMonitor(orgID string) (*Monitor, error) {
|
|
orgID = strings.TrimSpace(orgID)
|
|
if orgID == "" {
|
|
return nil, fmt.Errorf("organization ID is required")
|
|
}
|
|
|
|
mtm.mu.RLock()
|
|
monitor, exists := mtm.monitors[orgID]
|
|
mtm.mu.RUnlock()
|
|
|
|
if exists {
|
|
return monitor, nil
|
|
}
|
|
|
|
mtm.mu.Lock()
|
|
defer mtm.mu.Unlock()
|
|
|
|
if mtm.monitors == nil {
|
|
mtm.monitors = make(map[string]*Monitor)
|
|
}
|
|
if mtm.tenantCancel == nil {
|
|
mtm.tenantCancel = make(map[string]context.CancelFunc)
|
|
}
|
|
if mtm.tenantDone == nil {
|
|
mtm.tenantDone = make(map[string]chan struct{})
|
|
}
|
|
|
|
// Double-check locking pattern
|
|
if monitor, exists = mtm.monitors[orgID]; exists {
|
|
return monitor, nil
|
|
}
|
|
|
|
if mtm.persistence == nil {
|
|
return nil, fmt.Errorf("tenant persistence is not configured")
|
|
}
|
|
if orgID != "default" && !mtm.persistence.OrgExists(orgID) {
|
|
return nil, fmt.Errorf("organization %q is not provisioned", orgID)
|
|
}
|
|
|
|
// Initialize new monitor for this tenant
|
|
log.Info().Str("org_id", orgID).Msg("initializing tenant monitor")
|
|
|
|
// 1. Load Tenant Config
|
|
// Deep copy the base config to ensure tenant isolation.
|
|
// Each tenant gets its own independent config that won't share
|
|
// credential slices or other mutable state with other tenants.
|
|
tenantConfig := mtm.baseConfig.DeepCopy()
|
|
|
|
// Clear inherited credentials - tenants must load their own
|
|
// This prevents credential leakage between tenants
|
|
tenantConfig.PVEInstances = nil
|
|
tenantConfig.PBSInstances = nil
|
|
tenantConfig.PMGInstances = nil
|
|
|
|
// Ensure the DataPath is correct for this tenant to isolate storage (sqlite, etc)
|
|
tenantPersistence, err := mtm.persistence.GetPersistence(orgID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get persistence for org %s: %w", orgID, err)
|
|
}
|
|
tenantConfig.DataPath = tenantPersistence.GetConfigDir()
|
|
|
|
// Load tenant-specific nodes from <orgDir>/nodes.enc
|
|
nodesConfig, err := tenantPersistence.LoadNodesConfig()
|
|
if err != nil {
|
|
log.Warn().Err(err).Str("org_id", orgID).Msg("failed to load tenant nodes config, starting with empty config")
|
|
// Not a fatal error - tenant may not have configured any nodes yet
|
|
} else if nodesConfig != nil {
|
|
tenantConfig.PVEInstances = nodesConfig.PVEInstances
|
|
tenantConfig.PBSInstances = nodesConfig.PBSInstances
|
|
tenantConfig.PMGInstances = nodesConfig.PMGInstances
|
|
log.Info().
|
|
Str("org_id", orgID).
|
|
Int("pve_count", len(nodesConfig.PVEInstances)).
|
|
Int("pbs_count", len(nodesConfig.PBSInstances)).
|
|
Int("pmg_count", len(nodesConfig.PMGInstances)).
|
|
Msg("Loaded tenant nodes config")
|
|
}
|
|
|
|
// 2. Create Monitor
|
|
// Usage of internal New constructor
|
|
monitor, err = New(tenantConfig)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create monitor for org %s: %w", orgID, err)
|
|
}
|
|
|
|
// Set org ID for tenant isolation
|
|
// This enables tenant-scoped WebSocket broadcasts
|
|
monitor.SetOrgID(orgID)
|
|
if mtm.recoveryMgr != nil {
|
|
monitor.SetRecoveryManager(mtm.recoveryMgr)
|
|
}
|
|
if mtm.initializer != nil {
|
|
mtm.initializer(monitor)
|
|
}
|
|
|
|
// 3. Start Monitor with a tenant-scoped runtime so RemoveTenant can stop it cleanly.
|
|
tenantCtx, tenantCancel := context.WithCancel(mtm.globalCtx)
|
|
tenantDone := make(chan struct{})
|
|
go func() {
|
|
defer close(tenantDone)
|
|
monitor.Start(tenantCtx, mtm.wsHub)
|
|
}()
|
|
|
|
mtm.monitors[orgID] = monitor
|
|
mtm.tenantCancel[orgID] = tenantCancel
|
|
mtm.tenantDone[orgID] = tenantDone
|
|
return monitor, nil
|
|
}
|
|
|
|
// PeekMonitor returns the tenant monitor instance if it is already initialized.
|
|
// It does not create a new monitor.
|
|
func (mtm *MultiTenantMonitor) PeekMonitor(orgID string) (*Monitor, bool) {
|
|
orgID = strings.TrimSpace(orgID)
|
|
if orgID == "" {
|
|
return nil, false
|
|
}
|
|
|
|
mtm.mu.RLock()
|
|
defer mtm.mu.RUnlock()
|
|
monitor, exists := mtm.monitors[orgID]
|
|
return monitor, exists
|
|
}
|
|
|
|
// Stop stops all tenant monitors.
|
|
func (mtm *MultiTenantMonitor) Stop() {
|
|
log.Info().Msg("stopping MultiTenantMonitor and all tenant instances")
|
|
|
|
mtm.mu.Lock()
|
|
mtm.globalCancel()
|
|
|
|
monitors := make([]*Monitor, 0, len(mtm.monitors))
|
|
cancels := make([]context.CancelFunc, 0, len(mtm.tenantCancel))
|
|
doneSignals := make([]chan struct{}, 0, len(mtm.tenantDone))
|
|
for orgID, monitor := range mtm.monitors {
|
|
if cancel := mtm.tenantCancel[orgID]; cancel != nil {
|
|
cancels = append(cancels, cancel)
|
|
}
|
|
if done := mtm.tenantDone[orgID]; done != nil {
|
|
doneSignals = append(doneSignals, done)
|
|
}
|
|
monitors = append(monitors, monitor)
|
|
}
|
|
mtm.tenantCancel = make(map[string]context.CancelFunc)
|
|
mtm.tenantDone = make(map[string]chan struct{})
|
|
mtm.monitors = make(map[string]*Monitor)
|
|
mtm.mu.Unlock()
|
|
|
|
for _, cancel := range cancels {
|
|
cancel()
|
|
}
|
|
for _, done := range doneSignals {
|
|
waitForTenantMonitorShutdown("", done)
|
|
}
|
|
for _, monitor := range monitors {
|
|
monitor.Stop()
|
|
}
|
|
}
|
|
|
|
// RemoveTenant stops and removes a specific tenant's monitor.
|
|
// Useful for offboarding or manual reloading.
|
|
func (mtm *MultiTenantMonitor) RemoveTenant(orgID string) {
|
|
orgID = strings.TrimSpace(orgID)
|
|
if orgID == "" {
|
|
return
|
|
}
|
|
|
|
mtm.mu.Lock()
|
|
monitor, exists := mtm.monitors[orgID]
|
|
cancel := mtm.tenantCancel[orgID]
|
|
done := mtm.tenantDone[orgID]
|
|
delete(mtm.monitors, orgID)
|
|
delete(mtm.tenantCancel, orgID)
|
|
delete(mtm.tenantDone, orgID)
|
|
mtm.mu.Unlock()
|
|
|
|
if !exists {
|
|
return
|
|
}
|
|
|
|
log.Info().Str("org_id", orgID).Msg("stopping and removing tenant monitor")
|
|
if cancel != nil {
|
|
cancel()
|
|
}
|
|
waitForTenantMonitorShutdown(orgID, done)
|
|
monitor.Stop()
|
|
}
|
|
|
|
func waitForTenantMonitorShutdown(orgID string, done <-chan struct{}) {
|
|
if done == nil {
|
|
return
|
|
}
|
|
|
|
timer := time.NewTimer(tenantMonitorShutdownTimeout)
|
|
defer timer.Stop()
|
|
|
|
select {
|
|
case <-done:
|
|
case <-timer.C:
|
|
logger := log.Warn().Dur("timeout", tenantMonitorShutdownTimeout)
|
|
if orgID != "" {
|
|
logger = logger.Str("org_id", orgID)
|
|
}
|
|
logger.Msg("timed out waiting for tenant monitor loop to exit")
|
|
}
|
|
}
|
|
|
|
// OrgExists checks if an organization exists (directory exists) without creating it.
|
|
func (mtm *MultiTenantMonitor) OrgExists(orgID string) bool {
|
|
orgID = strings.TrimSpace(orgID)
|
|
if orgID == "" {
|
|
return false
|
|
}
|
|
|
|
if mtm.persistence == nil {
|
|
return false
|
|
}
|
|
return mtm.persistence.OrgExists(orgID)
|
|
}
|