mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 19:41:17 +00:00
1023 lines
35 KiB
Go
1023 lines
35 KiB
Go
package monitoring
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/models"
|
|
"github.com/rcourtman/pulse-go-rewrite/pkg/proxmox"
|
|
)
|
|
|
|
type slowGuestAgentClusterClient struct {
|
|
stubPVEClient
|
|
resources []proxmox.ClusterResource
|
|
fsDelay time.Duration
|
|
}
|
|
|
|
type emptyFSInfoClusterClient struct {
|
|
stubPVEClient
|
|
resources []proxmox.ClusterResource
|
|
}
|
|
|
|
type repeatedLowTrustMemoryClusterClient struct {
|
|
stubPVEClient
|
|
resources []proxmox.ClusterResource
|
|
vmStatuses map[int]*proxmox.VMStatus
|
|
}
|
|
|
|
type rotatingGuestAgentClusterClient struct {
|
|
stubPVEClient
|
|
resources []proxmox.ClusterResource
|
|
fsDelay time.Duration
|
|
}
|
|
|
|
type transientStatusFailureClusterClient struct {
|
|
stubPVEClient
|
|
resources []proxmox.ClusterResource
|
|
}
|
|
|
|
type healthyGuestLowTrustMemoryClusterClient struct {
|
|
stubPVEClient
|
|
resources []proxmox.ClusterResource
|
|
}
|
|
|
|
type windowsDriveClusterClient struct {
|
|
stubPVEClient
|
|
resources []proxmox.ClusterResource
|
|
}
|
|
|
|
func (c *slowGuestAgentClusterClient) GetClusterResources(ctx context.Context, resourceType string) ([]proxmox.ClusterResource, error) {
|
|
return c.resources, nil
|
|
}
|
|
|
|
func (c *slowGuestAgentClusterClient) GetVMStatus(ctx context.Context, node string, vmid int) (*proxmox.VMStatus, error) {
|
|
return &proxmox.VMStatus{
|
|
MaxMem: 8 * 1024,
|
|
Mem: 4 * 1024,
|
|
Agent: proxmox.VMAgentField{Value: 1},
|
|
}, nil
|
|
}
|
|
|
|
func (c *slowGuestAgentClusterClient) GetVMFSInfo(ctx context.Context, node string, vmid int) ([]proxmox.VMFileSystem, error) {
|
|
select {
|
|
case <-time.After(c.fsDelay):
|
|
case <-ctx.Done():
|
|
return nil, ctx.Err()
|
|
}
|
|
return []proxmox.VMFileSystem{{
|
|
Mountpoint: "/",
|
|
Type: "ext4",
|
|
TotalBytes: 100 * 1024 * 1024 * 1024,
|
|
UsedBytes: 40 * 1024 * 1024 * 1024,
|
|
Disk: "/dev/vda",
|
|
}}, nil
|
|
}
|
|
|
|
func (c *emptyFSInfoClusterClient) GetClusterResources(ctx context.Context, resourceType string) ([]proxmox.ClusterResource, error) {
|
|
return c.resources, nil
|
|
}
|
|
|
|
func (c *emptyFSInfoClusterClient) GetVMStatus(ctx context.Context, node string, vmid int) (*proxmox.VMStatus, error) {
|
|
return &proxmox.VMStatus{
|
|
MaxMem: 8 * 1024,
|
|
Mem: 4 * 1024,
|
|
Agent: proxmox.VMAgentField{Value: 1},
|
|
}, nil
|
|
}
|
|
|
|
func (c *emptyFSInfoClusterClient) GetVMFSInfo(ctx context.Context, node string, vmid int) ([]proxmox.VMFileSystem, error) {
|
|
return []proxmox.VMFileSystem{}, nil
|
|
}
|
|
|
|
func (c *repeatedLowTrustMemoryClusterClient) GetClusterResources(ctx context.Context, resourceType string) ([]proxmox.ClusterResource, error) {
|
|
return c.resources, nil
|
|
}
|
|
|
|
func (c *repeatedLowTrustMemoryClusterClient) GetVMStatus(ctx context.Context, node string, vmid int) (*proxmox.VMStatus, error) {
|
|
if status, ok := c.vmStatuses[vmid]; ok {
|
|
return status, nil
|
|
}
|
|
return nil, nil
|
|
}
|
|
|
|
func (c *rotatingGuestAgentClusterClient) GetClusterResources(ctx context.Context, resourceType string) ([]proxmox.ClusterResource, error) {
|
|
return c.resources, nil
|
|
}
|
|
|
|
func (c *rotatingGuestAgentClusterClient) GetVMStatus(ctx context.Context, node string, vmid int) (*proxmox.VMStatus, error) {
|
|
return &proxmox.VMStatus{
|
|
MaxMem: 8 * 1024,
|
|
Mem: 4 * 1024,
|
|
Agent: proxmox.VMAgentField{Value: 1},
|
|
}, nil
|
|
}
|
|
|
|
func (c *rotatingGuestAgentClusterClient) GetVMFSInfo(ctx context.Context, node string, vmid int) ([]proxmox.VMFileSystem, error) {
|
|
select {
|
|
case <-time.After(c.fsDelay):
|
|
case <-ctx.Done():
|
|
return nil, ctx.Err()
|
|
}
|
|
|
|
return []proxmox.VMFileSystem{{
|
|
Mountpoint: "/",
|
|
Type: "ext4",
|
|
TotalBytes: 100 * 1024 * 1024 * 1024,
|
|
UsedBytes: 40 * 1024 * 1024 * 1024,
|
|
Disk: "/dev/vda",
|
|
}}, nil
|
|
}
|
|
|
|
func (c *rotatingGuestAgentClusterClient) GetVMNetworkInterfaces(ctx context.Context, node string, vmid int) ([]proxmox.VMNetworkInterface, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (c *rotatingGuestAgentClusterClient) GetVMAgentInfo(ctx context.Context, node string, vmid int) (map[string]interface{}, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (c *rotatingGuestAgentClusterClient) GetVMAgentVersion(ctx context.Context, node string, vmid int) (string, error) {
|
|
return "", nil
|
|
}
|
|
|
|
func (c *transientStatusFailureClusterClient) GetClusterResources(ctx context.Context, resourceType string) ([]proxmox.ClusterResource, error) {
|
|
return c.resources, nil
|
|
}
|
|
|
|
func (c *transientStatusFailureClusterClient) GetVMStatus(ctx context.Context, node string, vmid int) (*proxmox.VMStatus, error) {
|
|
return nil, context.DeadlineExceeded
|
|
}
|
|
|
|
func (c *transientStatusFailureClusterClient) GetVMFSInfo(ctx context.Context, node string, vmid int) ([]proxmox.VMFileSystem, error) {
|
|
return []proxmox.VMFileSystem{{
|
|
Mountpoint: "/",
|
|
Type: "ext4",
|
|
TotalBytes: 100 * 1024 * 1024 * 1024,
|
|
UsedBytes: 40 * 1024 * 1024 * 1024,
|
|
Disk: "/dev/vda",
|
|
}}, nil
|
|
}
|
|
|
|
func (c *transientStatusFailureClusterClient) GetVMNetworkInterfaces(ctx context.Context, node string, vmid int) ([]proxmox.VMNetworkInterface, error) {
|
|
return []proxmox.VMNetworkInterface{
|
|
{
|
|
Name: "Ethernet0",
|
|
HardwareAddr: "00:11:22:33:44:55",
|
|
IPAddresses: []proxmox.VMIpAddress{
|
|
{Address: "192.168.1.50", Prefix: 24},
|
|
},
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
func (c *transientStatusFailureClusterClient) GetVMAgentInfo(ctx context.Context, node string, vmid int) (map[string]interface{}, error) {
|
|
return map[string]interface{}{
|
|
"pretty-name": "Ubuntu 24.04",
|
|
"version": "24.04",
|
|
}, nil
|
|
}
|
|
|
|
func (c *transientStatusFailureClusterClient) GetVMAgentVersion(ctx context.Context, node string, vmid int) (string, error) {
|
|
return "8.2.0", nil
|
|
}
|
|
|
|
func (c *transientStatusFailureClusterClient) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
|
return 5 * 1024, nil
|
|
}
|
|
|
|
func (c *healthyGuestLowTrustMemoryClusterClient) GetClusterResources(ctx context.Context, resourceType string) ([]proxmox.ClusterResource, error) {
|
|
return c.resources, nil
|
|
}
|
|
|
|
func (c *healthyGuestLowTrustMemoryClusterClient) GetVMStatus(ctx context.Context, node string, vmid int) (*proxmox.VMStatus, error) {
|
|
const total = uint64(8 << 30)
|
|
return &proxmox.VMStatus{
|
|
Status: "running",
|
|
Agent: proxmox.VMAgentField{Value: 1},
|
|
MaxMem: total,
|
|
Mem: total,
|
|
}, nil
|
|
}
|
|
|
|
func (c *healthyGuestLowTrustMemoryClusterClient) GetVMNetworkInterfaces(ctx context.Context, node string, vmid int) ([]proxmox.VMNetworkInterface, error) {
|
|
return []proxmox.VMNetworkInterface{
|
|
{
|
|
Name: "Ethernet0",
|
|
HardwareAddr: "00:11:22:33:44:55",
|
|
IPAddresses: []proxmox.VMIpAddress{
|
|
{Address: "192.168.1.50", Prefix: 24},
|
|
},
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
func (c *healthyGuestLowTrustMemoryClusterClient) GetVMAgentInfo(ctx context.Context, node string, vmid int) (map[string]interface{}, error) {
|
|
return map[string]interface{}{
|
|
"name": "Ubuntu",
|
|
"version-id": "24.04",
|
|
"pretty-name": "Ubuntu 24.04",
|
|
"version": "24.04",
|
|
"kernel-release": "6.8.0",
|
|
}, nil
|
|
}
|
|
|
|
func (c *healthyGuestLowTrustMemoryClusterClient) GetVMAgentVersion(ctx context.Context, node string, vmid int) (string, error) {
|
|
return "8.2.0", nil
|
|
}
|
|
|
|
func (c *healthyGuestLowTrustMemoryClusterClient) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
|
return 0, context.DeadlineExceeded
|
|
}
|
|
|
|
func (c *windowsDriveClusterClient) GetClusterResources(ctx context.Context, resourceType string) ([]proxmox.ClusterResource, error) {
|
|
return c.resources, nil
|
|
}
|
|
|
|
func (c *windowsDriveClusterClient) GetVMStatus(ctx context.Context, node string, vmid int) (*proxmox.VMStatus, error) {
|
|
return &proxmox.VMStatus{
|
|
Status: "running",
|
|
MaxMem: 8 * 1024,
|
|
Mem: 4 * 1024,
|
|
Agent: proxmox.VMAgentField{Value: 1},
|
|
}, nil
|
|
}
|
|
|
|
func (c *windowsDriveClusterClient) GetVMFSInfo(ctx context.Context, node string, vmid int) ([]proxmox.VMFileSystem, error) {
|
|
return []proxmox.VMFileSystem{
|
|
{
|
|
Mountpoint: "C:",
|
|
Type: "NTFS",
|
|
TotalBytes: 100 * 1024 * 1024 * 1024,
|
|
UsedBytes: 57 * 1024 * 1024 * 1024,
|
|
Disk: "C:",
|
|
},
|
|
{
|
|
Mountpoint: "System Reserved",
|
|
Type: "NTFS",
|
|
TotalBytes: 500 * 1024 * 1024,
|
|
UsedBytes: 150 * 1024 * 1024,
|
|
Disk: "system-reserved",
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
func TestGuestAgentFSInfoBudgetHonorsConfiguredTimeouts(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
m := &Monitor{
|
|
guestAgentFSInfoTimeout: 15 * time.Second,
|
|
guestAgentRetries: 1,
|
|
}
|
|
|
|
budget := m.guestAgentFSInfoBudget()
|
|
if budget < 30*time.Second {
|
|
t.Fatalf("guestAgentFSInfoBudget() = %s, want at least 30s", budget)
|
|
}
|
|
}
|
|
|
|
func TestRotateIndexedClusterResources(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
original := []indexedClusterResource{
|
|
{order: 0, resource: proxmox.ClusterResource{VMID: 100}},
|
|
{order: 1, resource: proxmox.ClusterResource{VMID: 101}},
|
|
{order: 2, resource: proxmox.ClusterResource{VMID: 102}},
|
|
}
|
|
|
|
rotated := rotateIndexedClusterResources(original, 1)
|
|
if got := []int{rotated[0].resource.VMID, rotated[1].resource.VMID, rotated[2].resource.VMID}; got[0] != 101 || got[1] != 102 || got[2] != 100 {
|
|
t.Fatalf("rotateIndexedClusterResources(..., 1) VMIDs = %v, want [101 102 100]", got)
|
|
}
|
|
|
|
if original[0].resource.VMID != 100 || original[1].resource.VMID != 101 || original[2].resource.VMID != 102 {
|
|
t.Fatal("rotateIndexedClusterResources should not mutate the original slice")
|
|
}
|
|
}
|
|
|
|
func TestPollVMsAndContainersEfficientCompletesDiskQueriesWithinPollBudget(t *testing.T) {
|
|
t.Setenv("PULSE_DATA_DIR", t.TempDir())
|
|
|
|
client := &slowGuestAgentClusterClient{
|
|
fsDelay: 60 * time.Millisecond,
|
|
resources: []proxmox.ClusterResource{
|
|
{Type: "qemu", Node: "node1", VMID: 100, Name: "vm100", Status: "running", MaxMem: 8 * 1024, Mem: 4 * 1024, MaxDisk: 100 * 1024 * 1024 * 1024, MaxCPU: 4},
|
|
{Type: "qemu", Node: "node1", VMID: 101, Name: "vm101", Status: "running", MaxMem: 8 * 1024, Mem: 4 * 1024, MaxDisk: 100 * 1024 * 1024 * 1024, MaxCPU: 4},
|
|
{Type: "qemu", Node: "node1", VMID: 102, Name: "vm102", Status: "running", MaxMem: 8 * 1024, Mem: 4 * 1024, MaxDisk: 100 * 1024 * 1024 * 1024, MaxCPU: 4},
|
|
{Type: "qemu", Node: "node1", VMID: 103, Name: "vm103", Status: "running", MaxMem: 8 * 1024, Mem: 4 * 1024, MaxDisk: 100 * 1024 * 1024 * 1024, MaxCPU: 4},
|
|
},
|
|
}
|
|
|
|
mon := newTestPVEMonitor("pve1")
|
|
defer mon.alertManager.Stop()
|
|
defer mon.notificationMgr.Stop()
|
|
|
|
mon.rateTracker = NewRateTracker()
|
|
mon.guestMetadataCache = make(map[string]guestMetadataCacheEntry)
|
|
mon.guestMetadataLimiter = make(map[string]time.Time)
|
|
mon.vmRRDMemCache = make(map[string]rrdMemCacheEntry)
|
|
mon.vmAgentMemCache = make(map[string]agentMemCacheEntry)
|
|
mon.guestAgentFSInfoTimeout = 250 * time.Millisecond
|
|
mon.guestAgentNetworkTimeout = 250 * time.Millisecond
|
|
mon.guestAgentOSInfoTimeout = 250 * time.Millisecond
|
|
mon.guestAgentVersionTimeout = 250 * time.Millisecond
|
|
mon.guestAgentRetries = 0
|
|
mon.guestAgentWorkSlots = make(chan struct{}, 4)
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 180*time.Millisecond)
|
|
defer cancel()
|
|
|
|
if ok := mon.pollVMsAndContainersEfficient(ctx, "pve1", "", false, client, map[string]string{"node1": "online"}); !ok {
|
|
t.Fatal("pollVMsAndContainersEfficient() returned false")
|
|
}
|
|
|
|
state := mon.state.GetSnapshot()
|
|
if len(state.VMs) != 4 {
|
|
t.Fatalf("expected 4 VMs, got %d", len(state.VMs))
|
|
}
|
|
for _, vm := range state.VMs {
|
|
if vm.Disk.Total <= 0 || vm.Disk.Usage <= 0 {
|
|
t.Fatalf("expected guest-agent disk data for %s, got total=%d usage=%.2f", vm.Name, vm.Disk.Total, vm.Disk.Usage)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestPollVMsAndContainersEfficientRotatesGuestAgentPriorityAcrossPolls(t *testing.T) {
|
|
t.Setenv("PULSE_DATA_DIR", t.TempDir())
|
|
|
|
client := &rotatingGuestAgentClusterClient{
|
|
fsDelay: 60 * time.Millisecond,
|
|
resources: []proxmox.ClusterResource{
|
|
{Type: "qemu", Node: "node1", VMID: 100, Name: "vm100", Status: "running", MaxMem: 8 * 1024, Mem: 4 * 1024, MaxDisk: 100 * 1024 * 1024 * 1024, MaxCPU: 4},
|
|
{Type: "qemu", Node: "node1", VMID: 101, Name: "vm101", Status: "running", MaxMem: 8 * 1024, Mem: 4 * 1024, MaxDisk: 100 * 1024 * 1024 * 1024, MaxCPU: 4},
|
|
{Type: "qemu", Node: "node1", VMID: 102, Name: "vm102", Status: "running", MaxMem: 8 * 1024, Mem: 4 * 1024, MaxDisk: 100 * 1024 * 1024 * 1024, MaxCPU: 4},
|
|
},
|
|
}
|
|
|
|
mon := newTestPVEMonitor("pve1")
|
|
defer mon.alertManager.Stop()
|
|
defer mon.notificationMgr.Stop()
|
|
|
|
mon.rateTracker = NewRateTracker()
|
|
mon.guestMetadataCache = make(map[string]guestMetadataCacheEntry)
|
|
mon.guestMetadataLimiter = make(map[string]time.Time)
|
|
mon.vmRRDMemCache = make(map[string]rrdMemCacheEntry)
|
|
mon.vmAgentMemCache = make(map[string]agentMemCacheEntry)
|
|
mon.guestAgentWorkSlots = make(chan struct{}, 1)
|
|
mon.guestAgentFSInfoTimeout = 250 * time.Millisecond
|
|
mon.guestAgentNetworkTimeout = 250 * time.Millisecond
|
|
mon.guestAgentOSInfoTimeout = 250 * time.Millisecond
|
|
mon.guestAgentVersionTimeout = 250 * time.Millisecond
|
|
mon.guestAgentRetries = 0
|
|
|
|
checkResolved := func(expectedVMID int) {
|
|
state := mon.state.GetSnapshot()
|
|
if len(state.VMs) != 3 {
|
|
t.Fatalf("expected 3 VMs, got %d", len(state.VMs))
|
|
}
|
|
|
|
vmByID := make(map[int]models.VM, len(state.VMs))
|
|
for _, vm := range state.VMs {
|
|
vmByID[vm.VMID] = vm
|
|
}
|
|
|
|
if vmByID[expectedVMID].Disk.Usage <= 0 {
|
|
t.Fatalf("expected VM %d to get a real disk reading, got usage=%.2f reason=%q", expectedVMID, vmByID[expectedVMID].Disk.Usage, vmByID[expectedVMID].DiskStatusReason)
|
|
}
|
|
}
|
|
|
|
for _, expectedVMID := range []int{100, 101, 102} {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 75*time.Millisecond)
|
|
if ok := mon.pollVMsAndContainersEfficient(ctx, "pve1", "", false, client, map[string]string{"node1": "online"}); !ok {
|
|
cancel()
|
|
t.Fatal("pollVMsAndContainersEfficient() returned false")
|
|
}
|
|
cancel()
|
|
checkResolved(expectedVMID)
|
|
}
|
|
}
|
|
|
|
func TestPollVMsAndContainersEfficientPreservesCachedGuestMetadataWhenStatusUnavailable(t *testing.T) {
|
|
t.Setenv("PULSE_DATA_DIR", t.TempDir())
|
|
|
|
client := &transientStatusFailureClusterClient{
|
|
resources: []proxmox.ClusterResource{
|
|
{Type: "qemu", Node: "node1", VMID: 100, Name: "vm100", Status: "running", MaxMem: 8 * 1024, Mem: 4 * 1024, MaxDisk: 100 * 1024 * 1024 * 1024, MaxCPU: 4},
|
|
},
|
|
}
|
|
|
|
mon := newTestPVEMonitor("pve1")
|
|
defer mon.alertManager.Stop()
|
|
defer mon.notificationMgr.Stop()
|
|
|
|
mon.rateTracker = NewRateTracker()
|
|
mon.guestMetadataCache = map[string]guestMetadataCacheEntry{
|
|
guestMetadataCacheKey("pve1", "node1", 100): {
|
|
ipAddresses: []string{"192.168.1.50"},
|
|
networkInterfaces: []models.GuestNetworkInterface{
|
|
{Name: "Ethernet0", MAC: "00:11:22:33:44:55", Addresses: []string{"192.168.1.50"}},
|
|
},
|
|
osName: "Windows",
|
|
osVersion: "Server 2022",
|
|
agentVersion: "8.2.0",
|
|
fetchedAt: time.Now(),
|
|
},
|
|
}
|
|
mon.guestMetadataLimiter = make(map[string]time.Time)
|
|
mon.vmRRDMemCache = make(map[string]rrdMemCacheEntry)
|
|
mon.vmAgentMemCache = make(map[string]agentMemCacheEntry)
|
|
mon.guestAgentFSInfoTimeout = 250 * time.Millisecond
|
|
mon.guestAgentNetworkTimeout = 250 * time.Millisecond
|
|
mon.guestAgentOSInfoTimeout = 250 * time.Millisecond
|
|
mon.guestAgentVersionTimeout = 250 * time.Millisecond
|
|
mon.guestAgentRetries = 0
|
|
mon.guestAgentWorkSlots = make(chan struct{}, 1)
|
|
|
|
if ok := mon.pollVMsAndContainersEfficient(context.Background(), "pve1", "", false, client, map[string]string{"node1": "online"}); !ok {
|
|
t.Fatal("pollVMsAndContainersEfficient() returned false")
|
|
}
|
|
|
|
state := mon.state.GetSnapshot()
|
|
if len(state.VMs) != 1 {
|
|
t.Fatalf("expected 1 VM, got %d", len(state.VMs))
|
|
}
|
|
|
|
vm := state.VMs[0]
|
|
if len(vm.IPAddresses) != 1 || vm.IPAddresses[0] != "192.168.1.50" {
|
|
t.Fatalf("expected cached IPs to be preserved, got %#v", vm.IPAddresses)
|
|
}
|
|
if len(vm.NetworkInterfaces) != 1 || vm.NetworkInterfaces[0].Name != "Ethernet0" {
|
|
t.Fatalf("expected cached interfaces to be preserved, got %#v", vm.NetworkInterfaces)
|
|
}
|
|
if vm.OSName != "Windows" || vm.OSVersion != "Server 2022" {
|
|
t.Fatalf("expected cached OS info to be preserved, got %q %q", vm.OSName, vm.OSVersion)
|
|
}
|
|
if vm.AgentVersion != "8.2.0" {
|
|
t.Fatalf("expected cached agent version to be preserved, got %q", vm.AgentVersion)
|
|
}
|
|
}
|
|
|
|
func TestPollVMsAndContainersEfficientContinuesGuestAgentQueriesAfterTransientStatusFailure(t *testing.T) {
|
|
t.Setenv("PULSE_DATA_DIR", t.TempDir())
|
|
|
|
client := &transientStatusFailureClusterClient{
|
|
resources: []proxmox.ClusterResource{
|
|
{Type: "qemu", Node: "node1", VMID: 100, Name: "vm100", Status: "running", MaxMem: 8 * 1024, Mem: 8 * 1024, MaxDisk: 100 * 1024 * 1024 * 1024, MaxCPU: 4},
|
|
},
|
|
}
|
|
|
|
mon := newTestPVEMonitor("pve1")
|
|
defer mon.alertManager.Stop()
|
|
defer mon.notificationMgr.Stop()
|
|
|
|
mon.rateTracker = NewRateTracker()
|
|
mon.guestMetadataCache = make(map[string]guestMetadataCacheEntry)
|
|
mon.guestMetadataLimiter = make(map[string]time.Time)
|
|
mon.vmRRDMemCache = make(map[string]rrdMemCacheEntry)
|
|
mon.vmAgentMemCache = make(map[string]agentMemCacheEntry)
|
|
mon.guestAgentFSInfoTimeout = 250 * time.Millisecond
|
|
mon.guestAgentNetworkTimeout = 250 * time.Millisecond
|
|
mon.guestAgentOSInfoTimeout = 250 * time.Millisecond
|
|
mon.guestAgentVersionTimeout = 250 * time.Millisecond
|
|
mon.guestAgentRetries = 0
|
|
mon.guestAgentWorkSlots = make(chan struct{}, 1)
|
|
|
|
mon.state.UpdateVMsForInstance("pve1", []models.VM{
|
|
{
|
|
ID: makeGuestID("pve1", "node1", 100),
|
|
VMID: 100,
|
|
Name: "vm100",
|
|
Node: "node1",
|
|
Instance: "pve1",
|
|
Type: "qemu",
|
|
Status: "running",
|
|
AgentVersion: "8.1.0",
|
|
NetworkInterfaces: []models.GuestNetworkInterface{
|
|
{Name: "Ethernet0", MAC: "00:11:22:33:44:55", Addresses: []string{"192.168.1.50"}},
|
|
},
|
|
LastSeen: time.Now(),
|
|
},
|
|
})
|
|
|
|
if ok := mon.pollVMsAndContainersEfficient(context.Background(), "pve1", "", false, client, map[string]string{"node1": "online"}); !ok {
|
|
t.Fatal("pollVMsAndContainersEfficient() returned false")
|
|
}
|
|
|
|
state := mon.state.GetSnapshot()
|
|
if len(state.VMs) != 1 {
|
|
t.Fatalf("expected 1 VM, got %d", len(state.VMs))
|
|
}
|
|
|
|
vm := state.VMs[0]
|
|
if vm.MemorySource != "guest-agent-meminfo" {
|
|
t.Fatalf("expected guest-agent memory fallback after status failure, got %q", vm.MemorySource)
|
|
}
|
|
if vm.Disk.Usage != 40 {
|
|
t.Fatalf("expected live guest-agent disk usage after status failure, got %.2f", vm.Disk.Usage)
|
|
}
|
|
if vm.DiskStatusReason != "" {
|
|
t.Fatalf("expected empty disk status reason, got %q", vm.DiskStatusReason)
|
|
}
|
|
if len(vm.Disks) != 1 || vm.Disks[0].Device != "/dev/vda" {
|
|
t.Fatalf("expected live guest-agent disk inventory, got %#v", vm.Disks)
|
|
}
|
|
if len(vm.NetworkInterfaces) != 1 || vm.NetworkInterfaces[0].Name != "Ethernet0" {
|
|
t.Fatalf("expected refreshed network interfaces, got %#v", vm.NetworkInterfaces)
|
|
}
|
|
if vm.AgentVersion != "8.2.0" {
|
|
t.Fatalf("expected refreshed agent version, got %q", vm.AgentVersion)
|
|
}
|
|
}
|
|
|
|
func TestPollVMsAndContainersEfficientKeepsPreviousMemoryForHealthyGuestAfterRepeatedLowTrustFullUsage(t *testing.T) {
|
|
t.Setenv("PULSE_DATA_DIR", t.TempDir())
|
|
|
|
const total = uint64(8 << 30)
|
|
const trustedUsed = uint64(3 << 30)
|
|
|
|
client := &healthyGuestLowTrustMemoryClusterClient{
|
|
resources: []proxmox.ClusterResource{
|
|
{Type: "qemu", Node: "node1", VMID: 100, Name: "vm100", Status: "running", MaxMem: total, Mem: total, MaxDisk: 100 * 1024 * 1024 * 1024, MaxCPU: 4},
|
|
},
|
|
}
|
|
|
|
mon := newTestPVEMonitor("pve1")
|
|
defer mon.alertManager.Stop()
|
|
defer mon.notificationMgr.Stop()
|
|
|
|
mon.rateTracker = NewRateTracker()
|
|
mon.guestMetadataCache = make(map[string]guestMetadataCacheEntry)
|
|
mon.guestMetadataLimiter = make(map[string]time.Time)
|
|
mon.vmRRDMemCache = make(map[string]rrdMemCacheEntry)
|
|
mon.vmAgentMemCache = make(map[string]agentMemCacheEntry)
|
|
mon.guestAgentFSInfoTimeout = 250 * time.Millisecond
|
|
mon.guestAgentNetworkTimeout = 250 * time.Millisecond
|
|
mon.guestAgentOSInfoTimeout = 250 * time.Millisecond
|
|
mon.guestAgentVersionTimeout = 250 * time.Millisecond
|
|
mon.guestAgentRetries = 0
|
|
mon.guestAgentWorkSlots = make(chan struct{}, 1)
|
|
|
|
mon.state.UpdateVMsForInstance("pve1", []models.VM{
|
|
{
|
|
ID: makeGuestID("pve1", "node1", 100),
|
|
VMID: 100,
|
|
Name: "vm100",
|
|
Node: "node1",
|
|
Instance: "pve1",
|
|
Type: "qemu",
|
|
Status: "running",
|
|
MemorySource: "guest-agent-meminfo",
|
|
Memory: models.Memory{
|
|
Total: int64(total),
|
|
Used: int64(trustedUsed),
|
|
Free: int64(total - trustedUsed),
|
|
Usage: safePercentage(float64(trustedUsed), float64(total)),
|
|
},
|
|
LastSeen: time.Now(),
|
|
},
|
|
})
|
|
|
|
for i := 0; i < 2; i++ {
|
|
if ok := mon.pollVMsAndContainersEfficient(context.Background(), "pve1", "", false, client, map[string]string{"node1": "online"}); !ok {
|
|
t.Fatalf("pollVMsAndContainersEfficient() returned false on pass %d", i+1)
|
|
}
|
|
}
|
|
|
|
state := mon.state.GetSnapshot()
|
|
if len(state.VMs) != 1 {
|
|
t.Fatalf("expected 1 VM, got %d", len(state.VMs))
|
|
}
|
|
|
|
vm := state.VMs[0]
|
|
if vm.MemorySource != "previous-snapshot" {
|
|
t.Fatalf("memory source = %q, want previous-snapshot", vm.MemorySource)
|
|
}
|
|
if vm.Memory.Used != int64(trustedUsed) {
|
|
t.Fatalf("memory used = %d, want preserved %d", vm.Memory.Used, trustedUsed)
|
|
}
|
|
if len(vm.NetworkInterfaces) != 1 || vm.NetworkInterfaces[0].Name != "Ethernet0" {
|
|
t.Fatalf("expected guest agent network metadata to confirm healthy guest, got %#v", vm.NetworkInterfaces)
|
|
}
|
|
}
|
|
|
|
func TestPollVMsAndContainersEfficientCarriesForwardPreviousIndividualDisks(t *testing.T) {
|
|
t.Setenv("PULSE_DATA_DIR", t.TempDir())
|
|
|
|
client := &emptyFSInfoClusterClient{
|
|
resources: []proxmox.ClusterResource{
|
|
{Type: "qemu", Node: "node1", VMID: 100, Name: "vm100", Status: "running", MaxMem: 8 * 1024, Mem: 4 * 1024, MaxDisk: 100 * 1024 * 1024 * 1024, MaxCPU: 4},
|
|
},
|
|
}
|
|
|
|
mon := newTestPVEMonitor("pve1")
|
|
defer mon.alertManager.Stop()
|
|
defer mon.notificationMgr.Stop()
|
|
|
|
mon.rateTracker = NewRateTracker()
|
|
mon.guestMetadataCache = make(map[string]guestMetadataCacheEntry)
|
|
mon.guestMetadataLimiter = make(map[string]time.Time)
|
|
mon.vmRRDMemCache = make(map[string]rrdMemCacheEntry)
|
|
mon.vmAgentMemCache = make(map[string]agentMemCacheEntry)
|
|
mon.guestAgentWorkSlots = make(chan struct{}, 2)
|
|
|
|
prevVM := models.VM{
|
|
ID: makeGuestID("pve1", "node1", 100),
|
|
VMID: 100,
|
|
Name: "vm100",
|
|
Node: "node1",
|
|
Instance: "pve1",
|
|
Type: "qemu",
|
|
Status: "running",
|
|
Disk: models.Disk{
|
|
Total: 100 * 1024 * 1024 * 1024,
|
|
Used: 40 * 1024 * 1024 * 1024,
|
|
Free: 60 * 1024 * 1024 * 1024,
|
|
Usage: 40,
|
|
},
|
|
Disks: []models.Disk{
|
|
{
|
|
Total: 100 * 1024 * 1024 * 1024,
|
|
Used: 40 * 1024 * 1024 * 1024,
|
|
Free: 60 * 1024 * 1024 * 1024,
|
|
Usage: 40,
|
|
Mountpoint: "/",
|
|
Type: "ext4",
|
|
Device: "/dev/vda",
|
|
},
|
|
},
|
|
}
|
|
mon.state.UpdateVMs([]models.VM{prevVM})
|
|
|
|
if ok := mon.pollVMsAndContainersEfficient(context.Background(), "pve1", "", false, client, map[string]string{"node1": "online"}); !ok {
|
|
t.Fatal("pollVMsAndContainersEfficient() returned false")
|
|
}
|
|
|
|
state := mon.state.GetSnapshot()
|
|
if len(state.VMs) != 1 {
|
|
t.Fatalf("expected 1 VM, got %d", len(state.VMs))
|
|
}
|
|
|
|
vm := state.VMs[0]
|
|
if len(vm.Disks) != 1 {
|
|
t.Fatalf("expected previous individual disks to be preserved, got %#v", vm.Disks)
|
|
}
|
|
if vm.Disks[0].Mountpoint != "/" || vm.Disks[0].Device != "/dev/vda" {
|
|
t.Fatalf("unexpected carried-forward disk data: %#v", vm.Disks[0])
|
|
}
|
|
if vm.Disk.Usage != 40 {
|
|
t.Fatalf("expected aggregate disk usage to be carried forward, got %.2f", vm.Disk.Usage)
|
|
}
|
|
if vm.DiskStatusReason != "prev-no-filesystems" {
|
|
t.Fatalf("expected carried-forward disk status reason, got %q", vm.DiskStatusReason)
|
|
}
|
|
}
|
|
|
|
func TestPollVMsAndContainersEfficientMarksDiskUnknownUntilGuestAgentFilesystemDataArrives(t *testing.T) {
|
|
t.Setenv("PULSE_DATA_DIR", t.TempDir())
|
|
|
|
client := &emptyFSInfoClusterClient{
|
|
resources: []proxmox.ClusterResource{
|
|
{
|
|
Type: "qemu",
|
|
Node: "node1",
|
|
VMID: 100,
|
|
Name: "vm100",
|
|
Status: "running",
|
|
MaxMem: 8 * 1024,
|
|
Mem: 4 * 1024,
|
|
Disk: 57 * 1024 * 1024 * 1024,
|
|
MaxDisk: 100 * 1024 * 1024 * 1024,
|
|
MaxCPU: 4,
|
|
},
|
|
},
|
|
}
|
|
|
|
mon := newTestPVEMonitor("pve1")
|
|
defer mon.alertManager.Stop()
|
|
defer mon.notificationMgr.Stop()
|
|
|
|
mon.rateTracker = NewRateTracker()
|
|
mon.guestMetadataCache = make(map[string]guestMetadataCacheEntry)
|
|
mon.guestMetadataLimiter = make(map[string]time.Time)
|
|
mon.vmRRDMemCache = make(map[string]rrdMemCacheEntry)
|
|
mon.vmAgentMemCache = make(map[string]agentMemCacheEntry)
|
|
mon.guestAgentWorkSlots = make(chan struct{}, 2)
|
|
|
|
if ok := mon.pollVMsAndContainersEfficient(context.Background(), "pve1", "", false, client, map[string]string{"node1": "online"}); !ok {
|
|
t.Fatal("pollVMsAndContainersEfficient() returned false")
|
|
}
|
|
|
|
state := mon.state.GetSnapshot()
|
|
if len(state.VMs) != 1 {
|
|
t.Fatalf("expected 1 VM, got %d", len(state.VMs))
|
|
}
|
|
|
|
vm := state.VMs[0]
|
|
if vm.Disk.Usage != -1 {
|
|
t.Fatalf("expected aggregate disk usage to remain unknown, got %.2f", vm.Disk.Usage)
|
|
}
|
|
if vm.DiskStatusReason != "no-filesystems" {
|
|
t.Fatalf("expected disk status reason %q, got %q", "no-filesystems", vm.DiskStatusReason)
|
|
}
|
|
|
|
guestMetrics := mon.metricsHistory.GetGuestMetrics(vm.ID, "disk", time.Hour)
|
|
if len(guestMetrics) != 0 {
|
|
t.Fatalf("expected no disk metric samples while disk usage is unknown, got %#v", guestMetrics)
|
|
}
|
|
}
|
|
|
|
func TestPollVMsAndContainersEfficientUsesLinkedHostAgentDiskFallback(t *testing.T) {
|
|
t.Setenv("PULSE_DATA_DIR", t.TempDir())
|
|
|
|
client := &emptyFSInfoClusterClient{
|
|
resources: []proxmox.ClusterResource{
|
|
{
|
|
Type: "qemu",
|
|
Node: "node1",
|
|
VMID: 100,
|
|
Name: "vm100",
|
|
Status: "running",
|
|
MaxMem: 8 * 1024,
|
|
Mem: 4 * 1024,
|
|
MaxDisk: 100 * 1024 * 1024 * 1024,
|
|
MaxCPU: 4,
|
|
},
|
|
},
|
|
}
|
|
|
|
mon := newTestPVEMonitor("pve1")
|
|
defer mon.alertManager.Stop()
|
|
defer mon.notificationMgr.Stop()
|
|
|
|
mon.rateTracker = NewRateTracker()
|
|
mon.guestMetadataCache = make(map[string]guestMetadataCacheEntry)
|
|
mon.guestMetadataLimiter = make(map[string]time.Time)
|
|
mon.vmRRDMemCache = make(map[string]rrdMemCacheEntry)
|
|
mon.vmAgentMemCache = make(map[string]agentMemCacheEntry)
|
|
mon.guestAgentWorkSlots = make(chan struct{}, 2)
|
|
|
|
mon.state.UpsertHost(models.Host{
|
|
ID: "host-100",
|
|
Hostname: "vm100-agent",
|
|
Status: "online",
|
|
LinkedVMID: makeGuestID("pve1", "node1", 100),
|
|
Disks: []models.Disk{
|
|
{
|
|
Total: 100 * 1024 * 1024 * 1024,
|
|
Used: 57 * 1024 * 1024 * 1024,
|
|
Free: 43 * 1024 * 1024 * 1024,
|
|
Usage: 57,
|
|
Mountpoint: "C:",
|
|
Type: "NTFS",
|
|
Device: "C:",
|
|
},
|
|
},
|
|
})
|
|
|
|
if ok := mon.pollVMsAndContainersEfficient(context.Background(), "pve1", "", false, client, map[string]string{"node1": "online"}); !ok {
|
|
t.Fatal("pollVMsAndContainersEfficient() returned false")
|
|
}
|
|
|
|
state := mon.state.GetSnapshot()
|
|
if len(state.VMs) != 1 {
|
|
t.Fatalf("expected 1 VM, got %d", len(state.VMs))
|
|
}
|
|
|
|
vm := state.VMs[0]
|
|
if vm.Disk.Usage != 57 {
|
|
t.Fatalf("expected linked host-agent disk usage, got %.2f", vm.Disk.Usage)
|
|
}
|
|
if vm.DiskStatusReason != "" {
|
|
t.Fatalf("expected cleared disk status reason, got %q", vm.DiskStatusReason)
|
|
}
|
|
if len(vm.Disks) != 1 || vm.Disks[0].Mountpoint != "C:" {
|
|
t.Fatalf("expected linked host-agent disk inventory, got %#v", vm.Disks)
|
|
}
|
|
}
|
|
|
|
func TestPollVMsAndContainersEfficientKeepsNormalizedWindowsDriveRoots(t *testing.T) {
|
|
t.Setenv("PULSE_DATA_DIR", t.TempDir())
|
|
|
|
client := &windowsDriveClusterClient{
|
|
resources: []proxmox.ClusterResource{
|
|
{
|
|
Type: "qemu",
|
|
Node: "node1",
|
|
VMID: 100,
|
|
Name: "win100",
|
|
Status: "running",
|
|
MaxMem: 8 * 1024,
|
|
Mem: 4 * 1024,
|
|
Disk: 0,
|
|
MaxDisk: 100 * 1024 * 1024 * 1024,
|
|
MaxCPU: 4,
|
|
},
|
|
},
|
|
}
|
|
|
|
mon := newTestPVEMonitor("pve1")
|
|
defer mon.alertManager.Stop()
|
|
defer mon.notificationMgr.Stop()
|
|
|
|
mon.rateTracker = NewRateTracker()
|
|
mon.guestMetadataCache = make(map[string]guestMetadataCacheEntry)
|
|
mon.guestMetadataLimiter = make(map[string]time.Time)
|
|
mon.vmRRDMemCache = make(map[string]rrdMemCacheEntry)
|
|
mon.vmAgentMemCache = make(map[string]agentMemCacheEntry)
|
|
mon.guestAgentWorkSlots = make(chan struct{}, 2)
|
|
|
|
if ok := mon.pollVMsAndContainersEfficient(context.Background(), "pve1", "", false, client, map[string]string{"node1": "online"}); !ok {
|
|
t.Fatal("pollVMsAndContainersEfficient() returned false")
|
|
}
|
|
|
|
state := mon.state.GetSnapshot()
|
|
if len(state.VMs) != 1 {
|
|
t.Fatalf("expected 1 VM, got %d", len(state.VMs))
|
|
}
|
|
|
|
vm := state.VMs[0]
|
|
if vm.DiskStatusReason != "" {
|
|
t.Fatalf("expected empty disk status reason, got %q", vm.DiskStatusReason)
|
|
}
|
|
if len(vm.Disks) != 1 {
|
|
t.Fatalf("expected 1 usable Windows disk, got %#v", vm.Disks)
|
|
}
|
|
if vm.Disks[0].Mountpoint != "C:" {
|
|
t.Fatalf("expected normalized Windows drive root to be preserved, got %q", vm.Disks[0].Mountpoint)
|
|
}
|
|
if vm.Disk.Usage <= 0 {
|
|
t.Fatalf("expected Windows guest disk usage to be populated, got %.2f", vm.Disk.Usage)
|
|
}
|
|
}
|
|
|
|
func TestPollVMsAndContainersEfficientStabilizesSuspiciousRepeatedLowTrustMemory(t *testing.T) {
|
|
t.Setenv("PULSE_DATA_DIR", t.TempDir())
|
|
|
|
const total = uint64(8 << 30)
|
|
client := &repeatedLowTrustMemoryClusterClient{
|
|
resources: []proxmox.ClusterResource{
|
|
{Type: "qemu", Node: "node1", VMID: 100, Name: "vm100", Status: "running", MaxMem: total, Mem: total, MaxCPU: 4},
|
|
{Type: "qemu", Node: "node1", VMID: 101, Name: "vm101", Status: "running", MaxMem: total, Mem: total, MaxCPU: 4},
|
|
{Type: "qemu", Node: "node1", VMID: 102, Name: "vm102", Status: "running", MaxMem: total, Mem: total, MaxCPU: 4},
|
|
{Type: "qemu", Node: "node1", VMID: 103, Name: "vm103", Status: "running", MaxMem: total, Mem: 2 << 30, MaxCPU: 4},
|
|
},
|
|
vmStatuses: map[int]*proxmox.VMStatus{
|
|
100: {Status: "running", MaxMem: total, Mem: total, Balloon: 2 << 30, Agent: proxmox.VMAgentField{Value: 1}},
|
|
101: {Status: "running", MaxMem: total, Mem: total, Agent: proxmox.VMAgentField{Value: 1}},
|
|
102: {Status: "running", MaxMem: total, Mem: total, Agent: proxmox.VMAgentField{Value: 1}},
|
|
103: {Status: "running", MaxMem: total, Mem: 2 << 30, Agent: proxmox.VMAgentField{Value: 0}},
|
|
},
|
|
}
|
|
|
|
mon := newTestPVEMonitor("pve1")
|
|
defer mon.alertManager.Stop()
|
|
defer mon.notificationMgr.Stop()
|
|
|
|
mon.rateTracker = NewRateTracker()
|
|
mon.guestMetadataCache = make(map[string]guestMetadataCacheEntry)
|
|
mon.guestMetadataLimiter = make(map[string]time.Time)
|
|
mon.vmRRDMemCache = make(map[string]rrdMemCacheEntry)
|
|
mon.vmAgentMemCache = make(map[string]agentMemCacheEntry)
|
|
mon.guestAgentWorkSlots = make(chan struct{}, 4)
|
|
|
|
now := time.Now()
|
|
mon.state.UpdateVMs([]models.VM{
|
|
{
|
|
ID: makeGuestID("pve1", "node1", 100),
|
|
VMID: 100,
|
|
Name: "vm100",
|
|
Node: "node1",
|
|
Instance: "pve1",
|
|
Type: "qemu",
|
|
Status: "running",
|
|
MemorySource: "rrd-memavailable",
|
|
Memory: models.Memory{Total: int64(total), Used: 3 << 30, Free: 5 << 30, Usage: safePercentage(float64(3<<30), float64(total))},
|
|
LastSeen: now,
|
|
},
|
|
{
|
|
ID: makeGuestID("pve1", "node1", 101),
|
|
VMID: 101,
|
|
Name: "vm101",
|
|
Node: "node1",
|
|
Instance: "pve1",
|
|
Type: "qemu",
|
|
Status: "running",
|
|
MemorySource: "guest-agent-meminfo",
|
|
Memory: models.Memory{Total: int64(total), Used: 4 << 30, Free: 4 << 30, Usage: 50},
|
|
LastSeen: now,
|
|
},
|
|
{
|
|
ID: makeGuestID("pve1", "node1", 102),
|
|
VMID: 102,
|
|
Name: "vm102",
|
|
Node: "node1",
|
|
Instance: "pve1",
|
|
Type: "qemu",
|
|
Status: "running",
|
|
MemorySource: "previous-snapshot",
|
|
Memory: models.Memory{Total: int64(total), Used: 5 << 30, Free: 3 << 30, Usage: 62.5},
|
|
LastSeen: now,
|
|
},
|
|
})
|
|
|
|
if ok := mon.pollVMsAndContainersEfficient(context.Background(), "pve1", "", false, client, map[string]string{"node1": "online"}); !ok {
|
|
t.Fatal("pollVMsAndContainersEfficient() returned false")
|
|
}
|
|
|
|
state := mon.state.GetSnapshot()
|
|
if len(state.VMs) != 4 {
|
|
t.Fatalf("expected 4 VMs, got %d", len(state.VMs))
|
|
}
|
|
|
|
vmByID := make(map[int]models.VM, len(state.VMs))
|
|
for _, vm := range state.VMs {
|
|
vmByID[vm.VMID] = vm
|
|
}
|
|
|
|
if vmByID[100].MemorySource != "previous-snapshot" || vmByID[100].Memory.Used != 3<<30 {
|
|
t.Fatalf("vm100 memory = %#v source=%q, want preserved previous reading", vmByID[100].Memory, vmByID[100].MemorySource)
|
|
}
|
|
if vmByID[100].Memory.Balloon != 2<<30 {
|
|
t.Fatalf("vm100 balloon = %d, want current balloon", vmByID[100].Memory.Balloon)
|
|
}
|
|
if vmByID[101].MemorySource != "previous-snapshot" || vmByID[101].Memory.Used != 4<<30 {
|
|
t.Fatalf("vm101 memory = %#v source=%q, want preserved previous reading", vmByID[101].Memory, vmByID[101].MemorySource)
|
|
}
|
|
if vmByID[102].MemorySource != "previous-snapshot" || vmByID[102].Memory.Used != 5<<30 {
|
|
t.Fatalf("vm102 memory = %#v source=%q, want chained preserved reading", vmByID[102].Memory, vmByID[102].MemorySource)
|
|
}
|
|
if vmByID[103].MemorySource != "status-mem" || vmByID[103].Memory.Used != 2<<30 {
|
|
t.Fatalf("vm103 memory = %#v source=%q, want unaffected current reading", vmByID[103].Memory, vmByID[103].MemorySource)
|
|
}
|
|
|
|
snapshotKey := makeGuestSnapshotKey("pve1", "qemu", "node1", 100)
|
|
mon.diagMu.RLock()
|
|
snapshot, ok := mon.guestSnapshots[snapshotKey]
|
|
stabilizedSnapshot := mon.guestSnapshots[makeGuestSnapshotKey("pve1", "qemu", "node1", 102)]
|
|
mon.diagMu.RUnlock()
|
|
if !ok {
|
|
t.Fatal("expected guest snapshot for vm100")
|
|
}
|
|
if snapshot.MemorySource != "previous-snapshot" || snapshot.Memory.Used != 3<<30 {
|
|
t.Fatalf("snapshot memory = %#v source=%q, want preserved previous reading", snapshot.Memory, snapshot.MemorySource)
|
|
}
|
|
if !snapshotHasNote(stabilizedSnapshot.Notes, "preserved-previous-memory-after-repeated-low-trust-pattern") &&
|
|
!snapshotHasNote(stabilizedSnapshot.Notes, "preserved-previous-memory-for-healthy-guest-low-trust-full-usage") {
|
|
t.Fatalf("vm102 snapshot notes = %#v, want preservation note", stabilizedSnapshot.Notes)
|
|
}
|
|
}
|
|
|
|
func TestPollVMsAndContainersEfficientTreatsAvailableGuestAgentAsHealthyForMemoryCarryForward(t *testing.T) {
|
|
t.Setenv("PULSE_DATA_DIR", t.TempDir())
|
|
|
|
const total = uint64(8 << 30)
|
|
client := &repeatedLowTrustMemoryClusterClient{
|
|
resources: []proxmox.ClusterResource{
|
|
{Type: "qemu", Node: "node1", VMID: 100, Name: "vm100", Status: "running", MaxMem: total, Mem: total, MaxCPU: 4},
|
|
},
|
|
vmStatuses: map[int]*proxmox.VMStatus{
|
|
100: {Status: "running", MaxMem: total, Mem: total, Agent: proxmox.VMAgentField{Value: 1}},
|
|
},
|
|
}
|
|
|
|
mon := newTestPVEMonitor("pve1")
|
|
defer mon.alertManager.Stop()
|
|
defer mon.notificationMgr.Stop()
|
|
|
|
mon.rateTracker = NewRateTracker()
|
|
mon.guestMetadataCache = make(map[string]guestMetadataCacheEntry)
|
|
mon.guestMetadataLimiter = make(map[string]time.Time)
|
|
mon.vmRRDMemCache = make(map[string]rrdMemCacheEntry)
|
|
mon.vmAgentMemCache = make(map[string]agentMemCacheEntry)
|
|
mon.guestAgentWorkSlots = make(chan struct{}, 4)
|
|
|
|
now := time.Now()
|
|
mon.state.UpdateVMs([]models.VM{
|
|
{
|
|
ID: makeGuestID("pve1", "node1", 100),
|
|
VMID: 100,
|
|
Name: "vm100",
|
|
Node: "node1",
|
|
Instance: "pve1",
|
|
Type: "qemu",
|
|
Status: "running",
|
|
MemorySource: "previous-snapshot",
|
|
Memory: models.Memory{Total: int64(total), Used: 3 << 30, Free: 5 << 30, Usage: safePercentage(float64(3<<30), float64(total))},
|
|
LastSeen: now,
|
|
},
|
|
})
|
|
|
|
if ok := mon.pollVMsAndContainersEfficient(context.Background(), "pve1", "", false, client, map[string]string{"node1": "online"}); !ok {
|
|
t.Fatal("pollVMsAndContainersEfficient() returned false")
|
|
}
|
|
|
|
state := mon.state.GetSnapshot()
|
|
if len(state.VMs) != 1 {
|
|
t.Fatalf("expected 1 VM, got %d", len(state.VMs))
|
|
}
|
|
|
|
vm := state.VMs[0]
|
|
if vm.MemorySource != "previous-snapshot" || vm.Memory.Used != 3<<30 {
|
|
t.Fatalf("vm memory = %#v source=%q, want preserved previous reading", vm.Memory, vm.MemorySource)
|
|
}
|
|
}
|