Pulse/internal/monitoring/monitor_storage_coverage_test.go
rcourtman 0ae2806f18 fix(memory): add guest agent /proc/meminfo fallback to avoid VM memory inflation (#1270)
Proxmox status.Mem includes page cache as "used" memory, inflating
reported VM usage. The existing fallbacks (balloon meminfo, RRD, linked
host agent) were frequently unavailable, causing most VMs to fall
through to the inflated status-mem source.

Adds a new last-resort fallback that reads /proc/meminfo via the QEMU
guest agent file-read endpoint to get accurate MemAvailable. Results
are cached (60s positive, 5min negative backoff for unsupported VMs).

Also fixes: RRD memavailable fallback missing from traditional polling
path, cache key collisions in multi-PVE setups, FreeMem underflow
guard inconsistency, and integer overflow in kB-to-bytes conversion.
2026-02-20 13:31:52 +00:00

99 lines
3 KiB
Go

package monitoring
import (
"context"
"fmt"
"testing"
"time"
"github.com/rcourtman/pulse-go-rewrite/internal/models"
"github.com/rcourtman/pulse-go-rewrite/pkg/proxmox"
)
// Mock for PVE Client to simulate storage failures/successes
type mockPVEClientForStorage struct {
mockPVEClientExtra // Embed existing mock
ShouldFailStorageQuery bool
ShouldTimeoutStorage bool
StorageToFail map[string]bool // storage names that fail content retrieval
Storages []proxmox.Storage
}
func (m *mockPVEClientForStorage) GetStorage(ctx context.Context, node string) ([]proxmox.Storage, error) {
if m.ShouldFailStorageQuery {
return nil, fmt.Errorf("failed to get storage")
}
if m.ShouldTimeoutStorage {
return nil, fmt.Errorf("timeout doing request")
}
return m.Storages, nil
}
func (m *mockPVEClientForStorage) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
return 0, fmt.Errorf("not implemented")
}
func (m *mockPVEClientForStorage) GetStorageContent(ctx context.Context, node, storage string) ([]proxmox.StorageContent, error) {
if m.StorageToFail != nil && m.StorageToFail[storage] {
return nil, fmt.Errorf("failed to get content")
}
// Return some dummy content
return []proxmox.StorageContent{
{Volid: fmt.Sprintf("backup/vzdump-qemu-100-%s.vma.zst", time.Now().Format("2006_01_02-15_04_05")), Size: 1024, CTime: time.Now().Unix()},
}, nil
}
func TestMonitor_PollStorageBackupsWithNodes_Coverage(t *testing.T) {
// Setup
m := &Monitor{
state: models.NewState(),
}
// Setup State with VMs to test guest lookup logic
vms := []models.VM{
{VMID: 100, Node: "node1", Instance: "pve1", Name: "vm100"},
}
m.state.UpdateVMsForInstance("pve1", vms)
nodes := []proxmox.Node{
{Node: "node1", Status: "online"},
{Node: "node2", Status: "offline"}, // offline node logic
}
nodeEffectiveStatus := map[string]string{
"node1": "online",
"node2": "offline",
}
storages := []proxmox.Storage{
{Storage: "local", Content: "backup", Type: "dir", Enabled: 1, Active: 1, Shared: 0},
{Storage: "shared", Content: "backup", Type: "nfs", Enabled: 1, Active: 1, Shared: 1},
{Storage: "broken", Content: "backup", Type: "dir", Enabled: 1, Active: 1, Shared: 0},
}
client := &mockPVEClientForStorage{
Storages: storages,
StorageToFail: map[string]bool{"broken": true},
}
// EXECUTE
ctx := context.Background()
m.pollStorageBackupsWithNodes(ctx, "pve1", client, nodes, nodeEffectiveStatus)
// Verify State
snapshot := m.state.GetSnapshot()
if len(snapshot.PVEBackups.StorageBackups) == 0 {
t.Error("Expected backups to be found")
}
// Check offline node preservation logic
// If a storage was previously known for 'node2' (offline), it should be preserved if not shared.
// But we didn't seed initial state with old backups for node2.
// Test Timeout Logic
client.ShouldTimeoutStorage = true
m.pollStorageBackupsWithNodes(ctx, "pve1", client, nodes, nodeEffectiveStatus)
// Should log warning and retry (mock returns timeout again, so fails)
}