mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 19:41:17 +00:00
Implements structured logging package with LOG_LEVEL/LOG_FORMAT env support, debug level guards for hot paths, enriched error messages with actionable context, and stack trace capture for production debugging. Improves observability and reduces log overhead in high-frequency polling loops.
285 lines
7.2 KiB
Go
285 lines
7.2 KiB
Go
package monitoring
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/errors"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/logging"
|
|
"github.com/rcourtman/pulse-go-rewrite/pkg/pbs"
|
|
"github.com/rcourtman/pulse-go-rewrite/pkg/pmg"
|
|
"github.com/rs/zerolog"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
// PollResult represents the result of a polling operation
|
|
type PollResult struct {
|
|
InstanceName string
|
|
InstanceType string // "pve", "pbs", or "pmg"
|
|
Success bool
|
|
Error error
|
|
StartTime time.Time
|
|
EndTime time.Time
|
|
}
|
|
|
|
// PollTask represents a polling task to be executed
|
|
type PollTask struct {
|
|
InstanceName string
|
|
InstanceType string // "pve", "pbs", or "pmg"
|
|
PVEClient PVEClientInterface
|
|
PBSClient *pbs.Client
|
|
PMGClient *pmg.Client
|
|
}
|
|
|
|
// PollerPool manages concurrent polling with channels
|
|
type PollerPool struct {
|
|
workers int
|
|
tasksChan chan PollTask
|
|
resultsChan chan PollResult
|
|
monitor *Monitor
|
|
done chan struct{}
|
|
closed bool
|
|
}
|
|
|
|
// NewPollerPool creates a new poller pool
|
|
func NewPollerPool(workers int, monitor *Monitor) *PollerPool {
|
|
return &PollerPool{
|
|
workers: workers,
|
|
tasksChan: make(chan PollTask, workers*2), // Buffer for smooth operation
|
|
resultsChan: make(chan PollResult, workers*2),
|
|
monitor: monitor,
|
|
done: make(chan struct{}),
|
|
closed: false,
|
|
}
|
|
}
|
|
|
|
// Start starts the worker pool
|
|
func (p *PollerPool) Start(ctx context.Context) {
|
|
// Start workers
|
|
for i := 0; i < p.workers; i++ {
|
|
go p.worker(ctx, i)
|
|
}
|
|
|
|
// Start result collector
|
|
go p.collectResults(ctx)
|
|
}
|
|
|
|
// worker processes polling tasks
|
|
func (p *PollerPool) worker(ctx context.Context, id int) {
|
|
if logging.IsLevelEnabled(zerolog.DebugLevel) {
|
|
log.Debug().Int("worker", id).Msg("Poller worker started")
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
if logging.IsLevelEnabled(zerolog.DebugLevel) {
|
|
log.Debug().Int("worker", id).Msg("Poller worker stopped")
|
|
}
|
|
return
|
|
case task, ok := <-p.tasksChan:
|
|
if !ok {
|
|
if logging.IsLevelEnabled(zerolog.DebugLevel) {
|
|
log.Debug().Int("worker", id).Msg("Task channel closed, worker stopping")
|
|
}
|
|
return
|
|
}
|
|
|
|
result := p.executeTask(ctx, task)
|
|
|
|
// Send result if context is still active and channel is open
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
default:
|
|
// Use non-blocking send to avoid panic if channel is closed
|
|
select {
|
|
case p.resultsChan <- result:
|
|
case <-ctx.Done():
|
|
return
|
|
default:
|
|
// Channel might be closed, just continue
|
|
if logging.IsLevelEnabled(zerolog.DebugLevel) {
|
|
log.Debug().Int("worker", id).Msg("Results channel appears closed, skipping result")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// executeTask executes a single polling task
|
|
func (p *PollerPool) executeTask(ctx context.Context, task PollTask) PollResult {
|
|
result := PollResult{
|
|
InstanceName: task.InstanceName,
|
|
InstanceType: task.InstanceType,
|
|
StartTime: time.Now(),
|
|
Success: true,
|
|
}
|
|
|
|
switch task.InstanceType {
|
|
case "pve":
|
|
if task.PVEClient != nil {
|
|
p.monitor.pollPVEInstance(ctx, task.InstanceName, task.PVEClient)
|
|
} else {
|
|
result.Success = false
|
|
result.Error = errors.NewMonitorError(errors.ErrorTypeInternal, "poll_pve", task.InstanceName, errors.ErrInvalidInput)
|
|
}
|
|
case "pbs":
|
|
if task.PBSClient != nil {
|
|
p.monitor.pollPBSInstance(ctx, task.InstanceName, task.PBSClient)
|
|
} else {
|
|
result.Success = false
|
|
result.Error = errors.NewMonitorError(errors.ErrorTypeInternal, "poll_pbs", task.InstanceName, errors.ErrInvalidInput)
|
|
}
|
|
case "pmg":
|
|
if task.PMGClient != nil {
|
|
p.monitor.pollPMGInstance(ctx, task.InstanceName, task.PMGClient)
|
|
} else {
|
|
result.Success = false
|
|
result.Error = errors.NewMonitorError(errors.ErrorTypeInternal, "poll_pmg", task.InstanceName, errors.ErrInvalidInput)
|
|
}
|
|
default:
|
|
result.Success = false
|
|
result.Error = errors.NewMonitorError(errors.ErrorTypeValidation, "poll_unknown", task.InstanceName, errors.ErrInvalidInput)
|
|
}
|
|
|
|
result.EndTime = time.Now()
|
|
return result
|
|
}
|
|
|
|
// collectResults collects polling results
|
|
func (p *PollerPool) collectResults(ctx context.Context) {
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case result, ok := <-p.resultsChan:
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
duration := result.EndTime.Sub(result.StartTime)
|
|
if result.Success {
|
|
log.Debug().
|
|
Str("instance", result.InstanceName).
|
|
Str("type", result.InstanceType).
|
|
Dur("duration", duration).
|
|
Msg("Polling completed successfully")
|
|
} else {
|
|
log.Error().
|
|
Err(result.Error).
|
|
Str("instance", result.InstanceName).
|
|
Str("type", result.InstanceType).
|
|
Dur("duration", duration).
|
|
Msg("Polling failed; request will be retried on next cycle")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// SubmitTask submits a polling task
|
|
func (p *PollerPool) SubmitTask(ctx context.Context, task PollTask) error {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case p.tasksChan <- task:
|
|
return nil
|
|
default:
|
|
// Channel is full
|
|
return errors.NewMonitorError(errors.ErrorTypeInternal, "submit_task", task.InstanceName, errors.ErrTimeout)
|
|
}
|
|
}
|
|
|
|
// Close closes the poller pool
|
|
func (p *PollerPool) Close() {
|
|
if p.closed {
|
|
return
|
|
}
|
|
p.closed = true
|
|
|
|
// Signal shutdown
|
|
close(p.done)
|
|
|
|
// Close task channel to signal workers to stop
|
|
close(p.tasksChan)
|
|
|
|
// Don't close resultsChan here - let it drain naturally
|
|
// The collectors will exit when context is done
|
|
}
|
|
|
|
// pollWithChannels implements channel-based concurrent polling
|
|
func (m *Monitor) pollWithChannels(ctx context.Context) {
|
|
// Create worker pool based on instance count
|
|
workerCount := len(m.pveClients) + len(m.pbsClients) + len(m.pmgClients)
|
|
if workerCount > 10 {
|
|
workerCount = 10 // Cap at 10 workers
|
|
}
|
|
if workerCount < 2 {
|
|
workerCount = 2 // Minimum 2 workers
|
|
}
|
|
|
|
pool := NewPollerPool(workerCount, m)
|
|
|
|
// Create a context with timeout for this polling cycle
|
|
// Hardcoded to 10s minus 200ms (matches polling interval)
|
|
timeout := 10*time.Second - 200*time.Millisecond
|
|
pollCtx, cancel := context.WithTimeout(ctx, timeout)
|
|
defer cancel()
|
|
|
|
// Start the pool
|
|
pool.Start(pollCtx)
|
|
|
|
// Submit all tasks
|
|
var taskCount int
|
|
|
|
// Submit PVE tasks
|
|
for name, client := range m.pveClients {
|
|
task := PollTask{
|
|
InstanceName: name,
|
|
InstanceType: "pve",
|
|
PVEClient: client,
|
|
}
|
|
if err := pool.SubmitTask(pollCtx, task); err != nil {
|
|
log.Error().Err(err).Str("instance", name).Msg("Failed to submit PVE polling task")
|
|
} else {
|
|
taskCount++
|
|
}
|
|
}
|
|
|
|
// Submit PBS tasks
|
|
for name, client := range m.pbsClients {
|
|
task := PollTask{
|
|
InstanceName: name,
|
|
InstanceType: "pbs",
|
|
PBSClient: client,
|
|
}
|
|
if err := pool.SubmitTask(pollCtx, task); err != nil {
|
|
log.Error().Err(err).Str("instance", name).Msg("Failed to submit PBS polling task")
|
|
} else {
|
|
taskCount++
|
|
}
|
|
}
|
|
|
|
// Submit PMG tasks
|
|
for name, client := range m.pmgClients {
|
|
task := PollTask{
|
|
InstanceName: name,
|
|
InstanceType: "pmg",
|
|
PMGClient: client,
|
|
}
|
|
if err := pool.SubmitTask(pollCtx, task); err != nil {
|
|
log.Error().Err(err).Str("instance", name).Msg("Failed to submit PMG polling task")
|
|
} else {
|
|
taskCount++
|
|
}
|
|
}
|
|
|
|
// Wait for all tasks to complete or timeout
|
|
<-pollCtx.Done()
|
|
|
|
// Clean up
|
|
pool.Close()
|
|
|
|
log.Debug().Int("tasks", taskCount).Msg("Channel-based polling cycle completed")
|
|
}
|