Pulse/internal/monitoring/poller.go
rcourtman 7d422d2909 feat: add professional logging with runtime configuration and performance optimization
Implements structured logging package with LOG_LEVEL/LOG_FORMAT env support, debug level guards for hot paths, enriched error messages with actionable context, and stack trace capture for production debugging. Improves observability and reduces log overhead in high-frequency polling loops.
2025-10-20 15:13:38 +00:00

285 lines
7.2 KiB
Go

package monitoring
import (
"context"
"time"
"github.com/rcourtman/pulse-go-rewrite/internal/errors"
"github.com/rcourtman/pulse-go-rewrite/internal/logging"
"github.com/rcourtman/pulse-go-rewrite/pkg/pbs"
"github.com/rcourtman/pulse-go-rewrite/pkg/pmg"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
// PollResult represents the result of a polling operation
type PollResult struct {
InstanceName string
InstanceType string // "pve", "pbs", or "pmg"
Success bool
Error error
StartTime time.Time
EndTime time.Time
}
// PollTask represents a polling task to be executed
type PollTask struct {
InstanceName string
InstanceType string // "pve", "pbs", or "pmg"
PVEClient PVEClientInterface
PBSClient *pbs.Client
PMGClient *pmg.Client
}
// PollerPool manages concurrent polling with channels
type PollerPool struct {
workers int
tasksChan chan PollTask
resultsChan chan PollResult
monitor *Monitor
done chan struct{}
closed bool
}
// NewPollerPool creates a new poller pool
func NewPollerPool(workers int, monitor *Monitor) *PollerPool {
return &PollerPool{
workers: workers,
tasksChan: make(chan PollTask, workers*2), // Buffer for smooth operation
resultsChan: make(chan PollResult, workers*2),
monitor: monitor,
done: make(chan struct{}),
closed: false,
}
}
// Start starts the worker pool
func (p *PollerPool) Start(ctx context.Context) {
// Start workers
for i := 0; i < p.workers; i++ {
go p.worker(ctx, i)
}
// Start result collector
go p.collectResults(ctx)
}
// worker processes polling tasks
func (p *PollerPool) worker(ctx context.Context, id int) {
if logging.IsLevelEnabled(zerolog.DebugLevel) {
log.Debug().Int("worker", id).Msg("Poller worker started")
}
for {
select {
case <-ctx.Done():
if logging.IsLevelEnabled(zerolog.DebugLevel) {
log.Debug().Int("worker", id).Msg("Poller worker stopped")
}
return
case task, ok := <-p.tasksChan:
if !ok {
if logging.IsLevelEnabled(zerolog.DebugLevel) {
log.Debug().Int("worker", id).Msg("Task channel closed, worker stopping")
}
return
}
result := p.executeTask(ctx, task)
// Send result if context is still active and channel is open
select {
case <-ctx.Done():
return
default:
// Use non-blocking send to avoid panic if channel is closed
select {
case p.resultsChan <- result:
case <-ctx.Done():
return
default:
// Channel might be closed, just continue
if logging.IsLevelEnabled(zerolog.DebugLevel) {
log.Debug().Int("worker", id).Msg("Results channel appears closed, skipping result")
}
}
}
}
}
}
// executeTask executes a single polling task
func (p *PollerPool) executeTask(ctx context.Context, task PollTask) PollResult {
result := PollResult{
InstanceName: task.InstanceName,
InstanceType: task.InstanceType,
StartTime: time.Now(),
Success: true,
}
switch task.InstanceType {
case "pve":
if task.PVEClient != nil {
p.monitor.pollPVEInstance(ctx, task.InstanceName, task.PVEClient)
} else {
result.Success = false
result.Error = errors.NewMonitorError(errors.ErrorTypeInternal, "poll_pve", task.InstanceName, errors.ErrInvalidInput)
}
case "pbs":
if task.PBSClient != nil {
p.monitor.pollPBSInstance(ctx, task.InstanceName, task.PBSClient)
} else {
result.Success = false
result.Error = errors.NewMonitorError(errors.ErrorTypeInternal, "poll_pbs", task.InstanceName, errors.ErrInvalidInput)
}
case "pmg":
if task.PMGClient != nil {
p.monitor.pollPMGInstance(ctx, task.InstanceName, task.PMGClient)
} else {
result.Success = false
result.Error = errors.NewMonitorError(errors.ErrorTypeInternal, "poll_pmg", task.InstanceName, errors.ErrInvalidInput)
}
default:
result.Success = false
result.Error = errors.NewMonitorError(errors.ErrorTypeValidation, "poll_unknown", task.InstanceName, errors.ErrInvalidInput)
}
result.EndTime = time.Now()
return result
}
// collectResults collects polling results
func (p *PollerPool) collectResults(ctx context.Context) {
for {
select {
case <-ctx.Done():
return
case result, ok := <-p.resultsChan:
if !ok {
return
}
duration := result.EndTime.Sub(result.StartTime)
if result.Success {
log.Debug().
Str("instance", result.InstanceName).
Str("type", result.InstanceType).
Dur("duration", duration).
Msg("Polling completed successfully")
} else {
log.Error().
Err(result.Error).
Str("instance", result.InstanceName).
Str("type", result.InstanceType).
Dur("duration", duration).
Msg("Polling failed; request will be retried on next cycle")
}
}
}
}
// SubmitTask submits a polling task
func (p *PollerPool) SubmitTask(ctx context.Context, task PollTask) error {
select {
case <-ctx.Done():
return ctx.Err()
case p.tasksChan <- task:
return nil
default:
// Channel is full
return errors.NewMonitorError(errors.ErrorTypeInternal, "submit_task", task.InstanceName, errors.ErrTimeout)
}
}
// Close closes the poller pool
func (p *PollerPool) Close() {
if p.closed {
return
}
p.closed = true
// Signal shutdown
close(p.done)
// Close task channel to signal workers to stop
close(p.tasksChan)
// Don't close resultsChan here - let it drain naturally
// The collectors will exit when context is done
}
// pollWithChannels implements channel-based concurrent polling
func (m *Monitor) pollWithChannels(ctx context.Context) {
// Create worker pool based on instance count
workerCount := len(m.pveClients) + len(m.pbsClients) + len(m.pmgClients)
if workerCount > 10 {
workerCount = 10 // Cap at 10 workers
}
if workerCount < 2 {
workerCount = 2 // Minimum 2 workers
}
pool := NewPollerPool(workerCount, m)
// Create a context with timeout for this polling cycle
// Hardcoded to 10s minus 200ms (matches polling interval)
timeout := 10*time.Second - 200*time.Millisecond
pollCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
// Start the pool
pool.Start(pollCtx)
// Submit all tasks
var taskCount int
// Submit PVE tasks
for name, client := range m.pveClients {
task := PollTask{
InstanceName: name,
InstanceType: "pve",
PVEClient: client,
}
if err := pool.SubmitTask(pollCtx, task); err != nil {
log.Error().Err(err).Str("instance", name).Msg("Failed to submit PVE polling task")
} else {
taskCount++
}
}
// Submit PBS tasks
for name, client := range m.pbsClients {
task := PollTask{
InstanceName: name,
InstanceType: "pbs",
PBSClient: client,
}
if err := pool.SubmitTask(pollCtx, task); err != nil {
log.Error().Err(err).Str("instance", name).Msg("Failed to submit PBS polling task")
} else {
taskCount++
}
}
// Submit PMG tasks
for name, client := range m.pmgClients {
task := PollTask{
InstanceName: name,
InstanceType: "pmg",
PMGClient: client,
}
if err := pool.SubmitTask(pollCtx, task); err != nil {
log.Error().Err(err).Str("instance", name).Msg("Failed to submit PMG polling task")
} else {
taskCount++
}
}
// Wait for all tasks to complete or timeout
<-pollCtx.Done()
// Clean up
pool.Close()
log.Debug().Int("tasks", taskCount).Msg("Channel-based polling cycle completed")
}