test: Add comprehensive tests for guest metadata functions

Tests for rate limiting, retry, and slot management:
- retryGuestAgentCall: timeout retry, non-timeout break, context cancellation
- acquireGuestMetadataSlot: nil safety, slot acquisition, context cancellation
- releaseGuestMetadataSlot: nil safety, non-blocking release
- tryReserveGuestMetadataFetch: reservation logic, default hold duration
- scheduleNextGuestMetadataFetch: interval, jitter, RNG handling
- deferGuestMetadataRetry: backoff logic, defaults

Covers error handling and edge cases in guest agent metadata fetching.
This commit is contained in:
rcourtman 2025-12-01 21:10:28 +00:00
parent b3b8081426
commit ddfe2dc2c9

View file

@ -1,7 +1,11 @@
package monitoring
import (
"context"
"errors"
"math/rand"
"testing"
"time"
"github.com/rcourtman/pulse-go-rewrite/internal/models"
"github.com/rcourtman/pulse-go-rewrite/pkg/proxmox"
@ -543,3 +547,733 @@ func TestProcessGuestNetworkInterfaces(t *testing.T) {
})
}
}
func TestRetryGuestAgentCall(t *testing.T) {
t.Parallel()
t.Run("successful call on first attempt", func(t *testing.T) {
t.Parallel()
m := &Monitor{}
callCount := 0
fn := func(ctx context.Context) (interface{}, error) {
callCount++
return "success", nil
}
ctx := context.Background()
result, err := m.retryGuestAgentCall(ctx, 50*time.Millisecond, 2, fn)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
if result != "success" {
t.Fatalf("expected result 'success', got %v", result)
}
if callCount != 1 {
t.Fatalf("expected 1 call, got %d", callCount)
}
})
t.Run("timeout error triggers retry and eventually succeeds", func(t *testing.T) {
t.Parallel()
m := &Monitor{}
callCount := 0
fn := func(ctx context.Context) (interface{}, error) {
callCount++
if callCount < 3 {
return nil, errors.New("context deadline exceeded (timeout)")
}
return "success after retries", nil
}
ctx := context.Background()
result, err := m.retryGuestAgentCall(ctx, 50*time.Millisecond, 3, fn)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
if result != "success after retries" {
t.Fatalf("expected result 'success after retries', got %v", result)
}
if callCount != 3 {
t.Fatalf("expected 3 calls, got %d", callCount)
}
})
t.Run("non-timeout error does not trigger retry", func(t *testing.T) {
t.Parallel()
m := &Monitor{}
callCount := 0
nonTimeoutErr := errors.New("connection refused")
fn := func(ctx context.Context) (interface{}, error) {
callCount++
return nil, nonTimeoutErr
}
ctx := context.Background()
result, err := m.retryGuestAgentCall(ctx, 50*time.Millisecond, 3, fn)
if err == nil {
t.Fatal("expected error, got nil")
}
if err.Error() != nonTimeoutErr.Error() {
t.Fatalf("expected error %q, got %q", nonTimeoutErr.Error(), err.Error())
}
if result != nil {
t.Fatalf("expected nil result, got %v", result)
}
if callCount != 1 {
t.Fatalf("expected 1 call (no retry for non-timeout), got %d", callCount)
}
})
t.Run("all retries exhausted returns last error", func(t *testing.T) {
t.Parallel()
m := &Monitor{}
callCount := 0
fn := func(ctx context.Context) (interface{}, error) {
callCount++
return nil, errors.New("persistent timeout error")
}
ctx := context.Background()
result, err := m.retryGuestAgentCall(ctx, 50*time.Millisecond, 2, fn)
if err == nil {
t.Fatal("expected error, got nil")
}
if err.Error() != "persistent timeout error" {
t.Fatalf("expected 'persistent timeout error', got %q", err.Error())
}
if result != nil {
t.Fatalf("expected nil result, got %v", result)
}
// maxRetries=2 means: attempt 0 (initial) + attempts 1,2 (retries) = 3 calls
if callCount != 3 {
t.Fatalf("expected 3 calls (1 initial + 2 retries), got %d", callCount)
}
})
t.Run("context cancellation during retry delay aborts early", func(t *testing.T) {
t.Parallel()
m := &Monitor{}
callCount := 0
fn := func(ctx context.Context) (interface{}, error) {
callCount++
return nil, errors.New("timeout error")
}
ctx, cancel := context.WithCancel(context.Background())
// Cancel context after first call completes but during retry delay
go func() {
time.Sleep(10 * time.Millisecond)
cancel()
}()
result, err := m.retryGuestAgentCall(ctx, 50*time.Millisecond, 5, fn)
if err == nil {
t.Fatal("expected error, got nil")
}
if !errors.Is(err, context.Canceled) {
t.Fatalf("expected context.Canceled error, got %v", err)
}
if result != nil {
t.Fatalf("expected nil result, got %v", result)
}
// Should only have made 1 call before context was canceled during delay
if callCount != 1 {
t.Fatalf("expected 1 call before cancellation, got %d", callCount)
}
})
t.Run("zero retries means single attempt", func(t *testing.T) {
t.Parallel()
m := &Monitor{}
callCount := 0
fn := func(ctx context.Context) (interface{}, error) {
callCount++
return nil, errors.New("timeout error")
}
ctx := context.Background()
result, err := m.retryGuestAgentCall(ctx, 50*time.Millisecond, 0, fn)
if err == nil {
t.Fatal("expected error, got nil")
}
if result != nil {
t.Fatalf("expected nil result, got %v", result)
}
if callCount != 1 {
t.Fatalf("expected 1 call with maxRetries=0, got %d", callCount)
}
})
}
func TestAcquireGuestMetadataSlot(t *testing.T) {
t.Parallel()
t.Run("nil monitor returns true", func(t *testing.T) {
t.Parallel()
var m *Monitor
ctx := context.Background()
if !m.acquireGuestMetadataSlot(ctx) {
t.Fatal("nil monitor should return true (permissive default)")
}
})
t.Run("nil slots channel returns true", func(t *testing.T) {
t.Parallel()
m := &Monitor{guestMetadataSlots: nil}
ctx := context.Background()
if !m.acquireGuestMetadataSlot(ctx) {
t.Fatal("nil slots channel should return true (permissive default)")
}
})
t.Run("successfully acquires slot when available", func(t *testing.T) {
t.Parallel()
m := &Monitor{guestMetadataSlots: make(chan struct{}, 2)}
ctx := context.Background()
if !m.acquireGuestMetadataSlot(ctx) {
t.Fatal("should acquire slot when channel has capacity")
}
// Verify slot was actually acquired (channel should have 1 element)
if len(m.guestMetadataSlots) != 1 {
t.Fatalf("expected 1 slot acquired, got %d", len(m.guestMetadataSlots))
}
})
t.Run("context cancellation returns false when slot not available", func(t *testing.T) {
t.Parallel()
// Create a channel with capacity 1 and fill it
m := &Monitor{guestMetadataSlots: make(chan struct{}, 1)}
m.guestMetadataSlots <- struct{}{}
// Create already-cancelled context
ctx, cancel := context.WithCancel(context.Background())
cancel()
if m.acquireGuestMetadataSlot(ctx) {
t.Fatal("should return false when context is cancelled and slot not available")
}
})
t.Run("multiple acquires fill up the channel", func(t *testing.T) {
t.Parallel()
capacity := 3
m := &Monitor{guestMetadataSlots: make(chan struct{}, capacity)}
ctx := context.Background()
// Acquire all available slots
for i := 0; i < capacity; i++ {
if !m.acquireGuestMetadataSlot(ctx) {
t.Fatalf("should acquire slot %d of %d", i+1, capacity)
}
}
// Verify channel is full
if len(m.guestMetadataSlots) != capacity {
t.Fatalf("expected %d slots acquired, got %d", capacity, len(m.guestMetadataSlots))
}
// Next acquire should block; use cancelled context to test
ctx2, cancel := context.WithCancel(context.Background())
cancel()
if m.acquireGuestMetadataSlot(ctx2) {
t.Fatal("should return false when channel is full and context cancelled")
}
})
}
func TestTryReserveGuestMetadataFetch(t *testing.T) {
t.Parallel()
t.Run("nil monitor returns false", func(t *testing.T) {
t.Parallel()
var m *Monitor
now := time.Now()
if m.tryReserveGuestMetadataFetch("test-key", now) {
t.Fatal("nil monitor should return false")
}
})
t.Run("key not in map reserves and returns true", func(t *testing.T) {
t.Parallel()
m := &Monitor{
guestMetadataLimiter: make(map[string]time.Time),
guestMetadataHoldDuration: 15 * time.Second,
}
now := time.Now()
if !m.tryReserveGuestMetadataFetch("new-key", now) {
t.Fatal("should return true for new key")
}
// Verify the key was added with correct expiry
next, ok := m.guestMetadataLimiter["new-key"]
if !ok {
t.Fatal("key should be in limiter map")
}
expectedNext := now.Add(15 * time.Second)
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v, got %v", expectedNext, next)
}
})
t.Run("key in map with future time returns false", func(t *testing.T) {
t.Parallel()
now := time.Now()
m := &Monitor{
guestMetadataLimiter: map[string]time.Time{
"existing-key": now.Add(10 * time.Second), // future
},
guestMetadataHoldDuration: 15 * time.Second,
}
if m.tryReserveGuestMetadataFetch("existing-key", now) {
t.Fatal("should return false when key has future expiry")
}
// Verify the expiry was not changed
next := m.guestMetadataLimiter["existing-key"]
if !next.Equal(now.Add(10 * time.Second)) {
t.Fatal("expiry time should not have been modified")
}
})
t.Run("key in map with past time reserves and returns true", func(t *testing.T) {
t.Parallel()
now := time.Now()
m := &Monitor{
guestMetadataLimiter: map[string]time.Time{
"expired-key": now.Add(-5 * time.Second), // past
},
guestMetadataHoldDuration: 20 * time.Second,
}
if !m.tryReserveGuestMetadataFetch("expired-key", now) {
t.Fatal("should return true when key has past expiry")
}
// Verify the key was updated with new expiry
next := m.guestMetadataLimiter["expired-key"]
expectedNext := now.Add(20 * time.Second)
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v, got %v", expectedNext, next)
}
})
t.Run("default hold duration used when guestMetadataHoldDuration is zero", func(t *testing.T) {
t.Parallel()
m := &Monitor{
guestMetadataLimiter: make(map[string]time.Time),
guestMetadataHoldDuration: 0, // zero triggers default
}
now := time.Now()
if !m.tryReserveGuestMetadataFetch("key", now) {
t.Fatal("should return true for new key")
}
next := m.guestMetadataLimiter["key"]
expectedNext := now.Add(defaultGuestMetadataHold) // 15 seconds
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v (default hold), got %v", expectedNext, next)
}
})
t.Run("default hold duration used when guestMetadataHoldDuration is negative", func(t *testing.T) {
t.Parallel()
m := &Monitor{
guestMetadataLimiter: make(map[string]time.Time),
guestMetadataHoldDuration: -5 * time.Second, // negative triggers default
}
now := time.Now()
if !m.tryReserveGuestMetadataFetch("key", now) {
t.Fatal("should return true for new key")
}
next := m.guestMetadataLimiter["key"]
expectedNext := now.Add(defaultGuestMetadataHold)
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v (default hold), got %v", expectedNext, next)
}
})
t.Run("key at exact current time reserves and returns true", func(t *testing.T) {
t.Parallel()
now := time.Now()
m := &Monitor{
guestMetadataLimiter: map[string]time.Time{
"exact-key": now, // exactly now
},
guestMetadataHoldDuration: 10 * time.Second,
}
// now.Before(now) is false, so this should reserve
if !m.tryReserveGuestMetadataFetch("exact-key", now) {
t.Fatal("should return true when key expires exactly at now")
}
})
}
func TestScheduleNextGuestMetadataFetch(t *testing.T) {
t.Parallel()
t.Run("nil monitor is safe", func(t *testing.T) {
t.Parallel()
var m *Monitor
now := time.Now()
// Should not panic
m.scheduleNextGuestMetadataFetch("test-key", now)
})
t.Run("schedules with default interval when guestMetadataMinRefresh is zero", func(t *testing.T) {
t.Parallel()
m := &Monitor{
guestMetadataLimiter: make(map[string]time.Time),
guestMetadataMinRefresh: 0, // zero triggers default
}
now := time.Now()
m.scheduleNextGuestMetadataFetch("key", now)
next, ok := m.guestMetadataLimiter["key"]
if !ok {
t.Fatal("key should be in limiter map")
}
// config.DefaultGuestMetadataMinRefresh is 2 minutes
expectedNext := now.Add(2 * time.Minute)
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v (default interval), got %v", expectedNext, next)
}
})
t.Run("schedules with default interval when guestMetadataMinRefresh is negative", func(t *testing.T) {
t.Parallel()
m := &Monitor{
guestMetadataLimiter: make(map[string]time.Time),
guestMetadataMinRefresh: -1 * time.Minute, // negative triggers default
}
now := time.Now()
m.scheduleNextGuestMetadataFetch("key", now)
next := m.guestMetadataLimiter["key"]
expectedNext := now.Add(2 * time.Minute) // DefaultGuestMetadataMinRefresh
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v (default interval), got %v", expectedNext, next)
}
})
t.Run("uses configured interval when positive", func(t *testing.T) {
t.Parallel()
m := &Monitor{
guestMetadataLimiter: make(map[string]time.Time),
guestMetadataMinRefresh: 5 * time.Minute,
}
now := time.Now()
m.scheduleNextGuestMetadataFetch("key", now)
next := m.guestMetadataLimiter["key"]
expectedNext := now.Add(5 * time.Minute)
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v, got %v", expectedNext, next)
}
})
t.Run("adds jitter when rng is non-nil and jitter is positive", func(t *testing.T) {
t.Parallel()
// Use a seeded rng for deterministic testing
rng := newDeterministicRng(42)
m := &Monitor{
guestMetadataLimiter: make(map[string]time.Time),
guestMetadataMinRefresh: 1 * time.Minute,
guestMetadataRefreshJitter: 30 * time.Second,
rng: rng,
}
now := time.Now()
m.scheduleNextGuestMetadataFetch("key", now)
next := m.guestMetadataLimiter["key"]
minExpected := now.Add(1 * time.Minute)
maxExpected := now.Add(1*time.Minute + 30*time.Second)
if next.Before(minExpected) || next.After(maxExpected) {
t.Fatalf("expected next time between %v and %v, got %v", minExpected, maxExpected, next)
}
})
t.Run("no jitter when rng is nil", func(t *testing.T) {
t.Parallel()
m := &Monitor{
guestMetadataLimiter: make(map[string]time.Time),
guestMetadataMinRefresh: 1 * time.Minute,
guestMetadataRefreshJitter: 30 * time.Second, // jitter configured but rng is nil
rng: nil,
}
now := time.Now()
m.scheduleNextGuestMetadataFetch("key", now)
next := m.guestMetadataLimiter["key"]
expectedNext := now.Add(1 * time.Minute) // no jitter added
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v (no jitter), got %v", expectedNext, next)
}
})
t.Run("no jitter when jitter duration is zero", func(t *testing.T) {
t.Parallel()
rng := newDeterministicRng(42)
m := &Monitor{
guestMetadataLimiter: make(map[string]time.Time),
guestMetadataMinRefresh: 1 * time.Minute,
guestMetadataRefreshJitter: 0, // zero jitter
rng: rng,
}
now := time.Now()
m.scheduleNextGuestMetadataFetch("key", now)
next := m.guestMetadataLimiter["key"]
expectedNext := now.Add(1 * time.Minute)
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v (no jitter), got %v", expectedNext, next)
}
})
t.Run("no jitter when jitter duration is negative", func(t *testing.T) {
t.Parallel()
rng := newDeterministicRng(42)
m := &Monitor{
guestMetadataLimiter: make(map[string]time.Time),
guestMetadataMinRefresh: 1 * time.Minute,
guestMetadataRefreshJitter: -10 * time.Second, // negative jitter
rng: rng,
}
now := time.Now()
m.scheduleNextGuestMetadataFetch("key", now)
next := m.guestMetadataLimiter["key"]
expectedNext := now.Add(1 * time.Minute)
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v (no jitter), got %v", expectedNext, next)
}
})
t.Run("overwrites existing key", func(t *testing.T) {
t.Parallel()
now := time.Now()
m := &Monitor{
guestMetadataLimiter: map[string]time.Time{
"key": now.Add(-10 * time.Minute), // old value
},
guestMetadataMinRefresh: 3 * time.Minute,
}
m.scheduleNextGuestMetadataFetch("key", now)
next := m.guestMetadataLimiter["key"]
expectedNext := now.Add(3 * time.Minute)
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v, got %v", expectedNext, next)
}
})
}
func TestDeferGuestMetadataRetry(t *testing.T) {
t.Parallel()
t.Run("nil monitor is safe", func(t *testing.T) {
t.Parallel()
var m *Monitor
now := time.Now()
// Should not panic
m.deferGuestMetadataRetry("test-key", now)
})
t.Run("uses default backoff when guestMetadataRetryBackoff is zero", func(t *testing.T) {
t.Parallel()
m := &Monitor{
guestMetadataLimiter: make(map[string]time.Time),
guestMetadataRetryBackoff: 0, // zero triggers default
}
now := time.Now()
m.deferGuestMetadataRetry("key", now)
next, ok := m.guestMetadataLimiter["key"]
if !ok {
t.Fatal("key should be in limiter map")
}
// config.DefaultGuestMetadataRetryBackoff is 30 seconds
expectedNext := now.Add(30 * time.Second)
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v (default backoff), got %v", expectedNext, next)
}
})
t.Run("uses default backoff when guestMetadataRetryBackoff is negative", func(t *testing.T) {
t.Parallel()
m := &Monitor{
guestMetadataLimiter: make(map[string]time.Time),
guestMetadataRetryBackoff: -10 * time.Second, // negative triggers default
}
now := time.Now()
m.deferGuestMetadataRetry("key", now)
next := m.guestMetadataLimiter["key"]
expectedNext := now.Add(30 * time.Second) // DefaultGuestMetadataRetryBackoff
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v (default backoff), got %v", expectedNext, next)
}
})
t.Run("uses configured backoff when positive", func(t *testing.T) {
t.Parallel()
m := &Monitor{
guestMetadataLimiter: make(map[string]time.Time),
guestMetadataRetryBackoff: 45 * time.Second,
}
now := time.Now()
m.deferGuestMetadataRetry("key", now)
next := m.guestMetadataLimiter["key"]
expectedNext := now.Add(45 * time.Second)
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v, got %v", expectedNext, next)
}
})
t.Run("overwrites existing key", func(t *testing.T) {
t.Parallel()
now := time.Now()
m := &Monitor{
guestMetadataLimiter: map[string]time.Time{
"key": now.Add(-5 * time.Minute), // old value
},
guestMetadataRetryBackoff: 1 * time.Minute,
}
m.deferGuestMetadataRetry("key", now)
next := m.guestMetadataLimiter["key"]
expectedNext := now.Add(1 * time.Minute)
if next.Sub(expectedNext) > time.Millisecond || expectedNext.Sub(next) > time.Millisecond {
t.Fatalf("expected next time ~%v, got %v", expectedNext, next)
}
})
}
// newDeterministicRng creates a rand.Rand with a fixed seed for reproducible tests.
func newDeterministicRng(seed int64) *rand.Rand {
return rand.New(rand.NewSource(seed))
}
func TestReleaseGuestMetadataSlot(t *testing.T) {
t.Parallel()
t.Run("nil monitor is safe", func(t *testing.T) {
t.Parallel()
var m *Monitor
// Should not panic
m.releaseGuestMetadataSlot()
})
t.Run("nil slots channel is safe", func(t *testing.T) {
t.Parallel()
m := &Monitor{guestMetadataSlots: nil}
// Should not panic
m.releaseGuestMetadataSlot()
})
t.Run("successfully releases a slot", func(t *testing.T) {
t.Parallel()
m := &Monitor{guestMetadataSlots: make(chan struct{}, 2)}
ctx := context.Background()
// Acquire a slot first
if !m.acquireGuestMetadataSlot(ctx) {
t.Fatal("failed to acquire slot")
}
if len(m.guestMetadataSlots) != 1 {
t.Fatalf("expected 1 slot acquired, got %d", len(m.guestMetadataSlots))
}
// Release the slot
m.releaseGuestMetadataSlot()
// Verify slot was released
if len(m.guestMetadataSlots) != 0 {
t.Fatalf("expected 0 slots after release, got %d", len(m.guestMetadataSlots))
}
})
t.Run("release on empty channel does not block", func(t *testing.T) {
t.Parallel()
m := &Monitor{guestMetadataSlots: make(chan struct{}, 2)}
// Channel is empty - release should not block due to default case in select
done := make(chan struct{})
go func() {
m.releaseGuestMetadataSlot()
close(done)
}()
select {
case <-done:
// Success - release completed without blocking
case <-time.After(100 * time.Millisecond):
t.Fatal("releaseGuestMetadataSlot blocked on empty channel")
}
})
}