package monitoring import ( "context" "math/rand" "sort" "sync" "time" "github.com/rs/zerolog/log" ) // InstanceType represents a polling target category. type InstanceType string const ( InstanceTypePVE InstanceType = "pve" InstanceTypePBS InstanceType = "pbs" InstanceTypePMG InstanceType = "pmg" ) // StalenessSource provides normalized freshness hints for an instance. type StalenessSource interface { StalenessScore(instanceType InstanceType, instanceName string) (float64, bool) } // IntervalSelector chooses the next polling cadence for an instance. type IntervalSelector interface { SelectInterval(req IntervalRequest) time.Duration } // TaskEnqueuer receives scheduled tasks for downstream execution. type TaskEnqueuer interface { Enqueue(ctx context.Context, task ScheduledTask) error } // IntervalRequest bundles the context required to compute the next polling interval. type IntervalRequest struct { Now time.Time BaseInterval time.Duration MinInterval time.Duration MaxInterval time.Duration LastInterval time.Duration LastSuccess time.Time LastScheduled time.Time StalenessScore float64 ErrorCount int QueueDepth int InstanceKey string InstanceType InstanceType } // InstanceDescriptor describes a monitored endpoint for scheduling purposes. type InstanceDescriptor struct { Name string Type InstanceType LastSuccess time.Time LastFailure time.Time LastScheduled time.Time LastInterval time.Duration ErrorCount int Metadata map[string]any } // ScheduledTask represents a single polling opportunity planned by the scheduler. type ScheduledTask struct { InstanceName string InstanceType InstanceType NextRun time.Time Interval time.Duration Priority float64 Metadata map[string]any } // SchedulerConfig contains tunables for the adaptive scheduler. type SchedulerConfig struct { BaseInterval time.Duration MinInterval time.Duration MaxInterval time.Duration } // DefaultSchedulerConfig returns conservative defaults that preserve current behaviour. func DefaultSchedulerConfig() SchedulerConfig { return SchedulerConfig{ BaseInterval: 10 * time.Second, MinInterval: 5 * time.Second, MaxInterval: 5 * time.Minute, } } // AdaptiveScheduler orchestrates poll execution plans using pluggable scoring strategies. type AdaptiveScheduler struct { cfg SchedulerConfig staleness StalenessSource interval IntervalSelector enqueuer TaskEnqueuer mu sync.RWMutex lastPlan map[string]ScheduledTask } // NewAdaptiveScheduler constructs a scheduler with safe defaults. func NewAdaptiveScheduler(cfg SchedulerConfig, staleness StalenessSource, interval IntervalSelector, enqueuer TaskEnqueuer) *AdaptiveScheduler { if cfg.BaseInterval <= 0 { cfg.BaseInterval = DefaultSchedulerConfig().BaseInterval } if cfg.MinInterval <= 0 { cfg.MinInterval = DefaultSchedulerConfig().MinInterval } if cfg.MaxInterval <= 0 || cfg.MaxInterval < cfg.MinInterval { cfg.MaxInterval = DefaultSchedulerConfig().MaxInterval } if staleness == nil { staleness = noopStalenessSource{} } if interval == nil { interval = newAdaptiveIntervalSelector(cfg) } if enqueuer == nil { enqueuer = noopTaskEnqueuer{} } return &AdaptiveScheduler{ cfg: cfg, staleness: staleness, interval: interval, enqueuer: enqueuer, lastPlan: make(map[string]ScheduledTask), } } // BuildPlan produces an ordered set of scheduled tasks for the supplied inventory. func (s *AdaptiveScheduler) BuildPlan(now time.Time, inventory []InstanceDescriptor, queueDepth int) []ScheduledTask { if len(inventory) == 0 { return nil } s.mu.Lock() defer s.mu.Unlock() tasks := make([]ScheduledTask, 0, len(inventory)) for _, inst := range inventory { score, ok := s.staleness.StalenessScore(inst.Type, inst.Name) if !ok { score = 0 } lastScheduled := inst.LastScheduled lastInterval := inst.LastInterval if cached, exists := s.lastPlan[schedulerKey(inst.Type, inst.Name)]; exists { if lastScheduled.IsZero() { lastScheduled = cached.NextRun } if lastInterval == 0 { lastInterval = cached.Interval } } if lastInterval == 0 { lastInterval = s.cfg.BaseInterval } currentDepth := queueDepth + len(tasks) req := IntervalRequest{ Now: now, BaseInterval: s.cfg.BaseInterval, MinInterval: s.cfg.MinInterval, MaxInterval: s.cfg.MaxInterval, LastInterval: lastInterval, LastSuccess: inst.LastSuccess, LastScheduled: lastScheduled, StalenessScore: score, ErrorCount: inst.ErrorCount, QueueDepth: currentDepth, InstanceKey: schedulerKey(inst.Type, inst.Name), InstanceType: inst.Type, } nextInterval := s.interval.SelectInterval(req) if nextInterval <= 0 { nextInterval = s.cfg.BaseInterval } if nextInterval < s.cfg.MinInterval { nextInterval = s.cfg.MinInterval } if nextInterval > s.cfg.MaxInterval { nextInterval = s.cfg.MaxInterval } nextRun := now if !lastScheduled.IsZero() { nextRun = lastScheduled.Add(nextInterval) } else if !inst.LastSuccess.IsZero() { nextRun = inst.LastSuccess.Add(nextInterval) } if nextRun.Before(now) { nextRun = now } task := ScheduledTask{ InstanceName: inst.Name, InstanceType: inst.Type, NextRun: nextRun, Interval: nextInterval, Priority: score, Metadata: inst.Metadata, } s.lastPlan[schedulerKey(inst.Type, inst.Name)] = task tasks = append(tasks, task) } sort.Slice(tasks, func(i, j int) bool { if tasks[i].NextRun.Equal(tasks[j].NextRun) { if tasks[i].Priority == tasks[j].Priority { return tasks[i].InstanceName < tasks[j].InstanceName } return tasks[i].Priority > tasks[j].Priority } return tasks[i].NextRun.Before(tasks[j].NextRun) }) return tasks } // FilterDue returns tasks whose NextRun is at or before now. func (s *AdaptiveScheduler) FilterDue(now time.Time, tasks []ScheduledTask) []ScheduledTask { if len(tasks) == 0 { return nil } due := make([]ScheduledTask, 0, len(tasks)) for _, task := range tasks { if !task.NextRun.After(now) { due = append(due, task) } } return due } // DispatchDue enqueues due tasks using the configured sink for tracking purposes. func (s *AdaptiveScheduler) DispatchDue(ctx context.Context, now time.Time, tasks []ScheduledTask) []ScheduledTask { if s == nil { return tasks } due := s.FilterDue(now, tasks) if len(due) == 0 { return due } for _, task := range due { if err := s.enqueuer.Enqueue(ctx, task); err != nil { log.Warn(). Err(err). Str("instance", task.InstanceName). Str("type", string(task.InstanceType)). Msg("Failed to enqueue scheduled task") } } return due } // LastScheduled returns the last recorded task for the given instance, if any. func (s *AdaptiveScheduler) LastScheduled(instanceType InstanceType, instanceName string) (ScheduledTask, bool) { if s == nil { return ScheduledTask{}, false } s.mu.RLock() defer s.mu.RUnlock() task, ok := s.lastPlan[schedulerKey(instanceType, instanceName)] return task, ok } type noopStalenessSource struct{} func (noopStalenessSource) StalenessScore(instanceType InstanceType, instanceName string) (float64, bool) { return 0, false } type fixedIntervalSelector struct { interval time.Duration } func (f *fixedIntervalSelector) SelectInterval(req IntervalRequest) time.Duration { if f.interval > 0 { return f.interval } return req.BaseInterval } type adaptiveIntervalSelector struct { mu sync.Mutex state map[string]time.Duration rng *rand.Rand alpha float64 jitterFraction float64 queueStretch float64 errorPenalty float64 } func newAdaptiveIntervalSelector(cfg SchedulerConfig) *adaptiveIntervalSelector { return &adaptiveIntervalSelector{ state: make(map[string]time.Duration), rng: rand.New(rand.NewSource(time.Now().UnixNano())), alpha: 0.6, jitterFraction: 0.05, queueStretch: 0.1, errorPenalty: 0.6, } } func (a *adaptiveIntervalSelector) SelectInterval(req IntervalRequest) time.Duration { min := req.MinInterval max := req.MaxInterval if max <= 0 || max < min { max = min } score := clampFloat(req.StalenessScore, 0, 1) span := float64(max - min) target := time.Duration(float64(min) + span*(1-score)) if target < min { target = min } if target > max { target = max } if req.ErrorCount > 0 { penalty := 1 + a.errorPenalty*float64(req.ErrorCount) if penalty > 0 { target = time.Duration(float64(target) / penalty) if target < min { target = min } } } if req.QueueDepth > 1 { stretch := 1 + a.queueStretch*float64(req.QueueDepth-1) target = time.Duration(float64(target) * stretch) if target > max { target = max } } base := req.LastInterval if base <= 0 { base = req.BaseInterval } var smoothed time.Duration key := req.InstanceKey if key == "" { key = string(req.InstanceType) } a.mu.Lock() prev, ok := a.state[key] if ok { base = prev } smoothed = time.Duration(a.alpha*float64(target) + (1-a.alpha)*float64(base)) if smoothed < min { smoothed = min } if smoothed > max { smoothed = max } a.state[key] = smoothed var jitter float64 if a.jitterFraction > 0 && smoothed > 0 { jitter = (a.rng.Float64()*2 - 1) * a.jitterFraction } a.mu.Unlock() if jitter != 0 { smoothed = time.Duration(float64(smoothed) * (1 + jitter)) } if smoothed < min { smoothed = min } if smoothed > max { smoothed = max } return smoothed } func clampFloat(v, min, max float64) float64 { if v < min { return min } if v > max { return max } return v } type noopTaskEnqueuer struct{} func (noopTaskEnqueuer) Enqueue(ctx context.Context, task ScheduledTask) error { return nil }