mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 03:20:11 +00:00
fix(memory): add guest agent /proc/meminfo fallback to avoid VM memory inflation (#1270)
Proxmox status.Mem includes page cache as "used" memory, inflating reported VM usage. The existing fallbacks (balloon meminfo, RRD, linked host agent) were frequently unavailable, causing most VMs to fall through to the inflated status-mem source. Adds a new last-resort fallback that reads /proc/meminfo via the QEMU guest agent file-read endpoint to get accurate MemAvailable. Results are cached (60s positive, 5min negative backoff for unsupported VMs). Also fixes: RRD memavailable fallback missing from traditional polling path, cache key collisions in multi-PVE setups, FreeMem underflow guard inconsistency, and integer overflow in kB-to-bytes conversion.
This commit is contained in:
parent
8c7d507ea4
commit
0ae2806f18
12 changed files with 237 additions and 9 deletions
|
|
@ -33,6 +33,10 @@ func (m *mockCephPVEClient) GetCephDF(ctx context.Context) (*proxmox.CephDF, err
|
|||
return args.Get(0).(*proxmox.CephDF), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *mockCephPVEClient) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
||||
return 0, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func TestPollCephCluster(t *testing.T) {
|
||||
t.Run("clears state when ceph not detected", func(t *testing.T) {
|
||||
m := &Monitor{state: models.NewState()}
|
||||
|
|
|
|||
|
|
@ -79,6 +79,7 @@ type PVEClientInterface interface {
|
|||
IsClusterMember(ctx context.Context) (bool, error)
|
||||
GetVMFSInfo(ctx context.Context, node string, vmid int) ([]proxmox.VMFileSystem, error)
|
||||
GetVMNetworkInterfaces(ctx context.Context, node string, vmid int) ([]proxmox.VMNetworkInterface, error)
|
||||
GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error)
|
||||
GetVMAgentInfo(ctx context.Context, node string, vmid int) (map[string]interface{}, error)
|
||||
GetVMAgentVersion(ctx context.Context, node string, vmid int) (string, error)
|
||||
GetZFSPoolStatus(ctx context.Context, node string) ([]proxmox.ZFSPoolStatus, error)
|
||||
|
|
@ -803,11 +804,12 @@ type Monitor struct {
|
|||
rrdCacheMu sync.RWMutex // Protects RRD memavailable cache
|
||||
nodeRRDMemCache map[string]rrdMemCacheEntry
|
||||
vmRRDMemCache map[string]rrdMemCacheEntry
|
||||
removedDockerHosts map[string]time.Time // Track deliberately removed Docker hosts (ID -> removal time)
|
||||
dockerTokenBindings map[string]string // Track token ID -> agent ID bindings to enforce uniqueness
|
||||
removedKubernetesClusters map[string]time.Time // Track deliberately removed Kubernetes clusters (ID -> removal time)
|
||||
kubernetesTokenBindings map[string]string // Track token ID -> agent ID bindings to enforce uniqueness
|
||||
hostTokenBindings map[string]string // Track tokenID:hostname -> host identity bindings
|
||||
vmAgentMemCache map[string]agentMemCacheEntry // Guest agent /proc/meminfo cache
|
||||
removedDockerHosts map[string]time.Time // Track deliberately removed Docker hosts (ID -> removal time)
|
||||
dockerTokenBindings map[string]string // Track token ID -> agent ID bindings to enforce uniqueness
|
||||
removedKubernetesClusters map[string]time.Time // Track deliberately removed Kubernetes clusters (ID -> removal time)
|
||||
kubernetesTokenBindings map[string]string // Track token ID -> agent ID bindings to enforce uniqueness
|
||||
hostTokenBindings map[string]string // Track tokenID:hostname -> host identity bindings
|
||||
dockerCommands map[string]*dockerHostCommand
|
||||
dockerCommandIndex map[string]string
|
||||
guestMetadataMu sync.RWMutex
|
||||
|
|
@ -850,6 +852,14 @@ type rrdMemCacheEntry struct {
|
|||
fetchedAt time.Time
|
||||
}
|
||||
|
||||
// agentMemCacheEntry caches MemAvailable read via guest agent file-read of /proc/meminfo.
|
||||
// A zero available with negative=true means the VM doesn't support this (e.g. Windows, agent off).
|
||||
type agentMemCacheEntry struct {
|
||||
available uint64
|
||||
negative bool // true = read failed, don't retry until TTL expires
|
||||
fetchedAt time.Time
|
||||
}
|
||||
|
||||
// pendingUpdatesCache caches apt pending updates count per node
|
||||
type pendingUpdatesCache struct {
|
||||
count int
|
||||
|
|
@ -1154,6 +1164,9 @@ const (
|
|||
nodeOfflineGracePeriod = 60 * time.Second // Grace period before marking Proxmox nodes offline
|
||||
nodeRRDCacheTTL = 30 * time.Second
|
||||
nodeRRDRequestTimeout = 2 * time.Second
|
||||
vmAgentMemCacheTTL = 60 * time.Second // Cache guest-agent /proc/meminfo reads
|
||||
vmAgentMemRequestTimeout = 3 * time.Second // Timeout for guest-agent file-read calls
|
||||
vmAgentMemNegativeTTL = 5 * time.Minute // Backoff for VMs where guest-agent read fails
|
||||
)
|
||||
|
||||
type taskOutcome struct {
|
||||
|
|
@ -1239,12 +1252,12 @@ func (m *Monitor) getNodeRRDMetrics(ctx context.Context, client PVEClientInterfa
|
|||
// getVMRRDMetrics fetches Proxmox RRD memavailable for a single VM with a
|
||||
// short-lived cache to avoid a live API call on every poll for VMs that
|
||||
// consistently lack guest-agent memory data (e.g. Windows VMs).
|
||||
func (m *Monitor) getVMRRDMetrics(ctx context.Context, client PVEClientInterface, node string, vmid int) (uint64, error) {
|
||||
func (m *Monitor) getVMRRDMetrics(ctx context.Context, client PVEClientInterface, instance, node string, vmid int) (uint64, error) {
|
||||
if client == nil || node == "" || vmid <= 0 {
|
||||
return 0, fmt.Errorf("invalid arguments for VM RRD lookup")
|
||||
}
|
||||
|
||||
cacheKey := fmt.Sprintf("%s/%d", node, vmid)
|
||||
cacheKey := fmt.Sprintf("%s/%s/%d", instance, node, vmid)
|
||||
now := time.Now()
|
||||
|
||||
m.rrdCacheMu.RLock()
|
||||
|
|
@ -1285,6 +1298,52 @@ func (m *Monitor) getVMRRDMetrics(ctx context.Context, client PVEClientInterface
|
|||
return memAvailable, nil
|
||||
}
|
||||
|
||||
// getVMAgentMemAvailable reads MemAvailable via the QEMU guest agent file-read
|
||||
// endpoint (/proc/meminfo). Results are cached; failed reads use a longer
|
||||
// negative-cache TTL to avoid hammering VMs that don't support it.
|
||||
func (m *Monitor) getVMAgentMemAvailable(ctx context.Context, client PVEClientInterface, instance, node string, vmid int) (uint64, error) {
|
||||
if client == nil || node == "" || vmid <= 0 {
|
||||
return 0, fmt.Errorf("invalid arguments for guest agent mem lookup")
|
||||
}
|
||||
|
||||
cacheKey := fmt.Sprintf("%s/%s/%d", instance, node, vmid)
|
||||
now := time.Now()
|
||||
|
||||
m.rrdCacheMu.RLock()
|
||||
if entry, ok := m.vmAgentMemCache[cacheKey]; ok {
|
||||
ttl := vmAgentMemCacheTTL
|
||||
if entry.negative {
|
||||
ttl = vmAgentMemNegativeTTL
|
||||
}
|
||||
if now.Sub(entry.fetchedAt) < ttl {
|
||||
m.rrdCacheMu.RUnlock()
|
||||
if entry.negative {
|
||||
return 0, fmt.Errorf("guest agent mem read previously failed (negative cache)")
|
||||
}
|
||||
return entry.available, nil
|
||||
}
|
||||
}
|
||||
m.rrdCacheMu.RUnlock()
|
||||
|
||||
requestCtx, cancel := context.WithTimeout(ctx, vmAgentMemRequestTimeout)
|
||||
defer cancel()
|
||||
|
||||
available, err := client.GetVMMemAvailableFromAgent(requestCtx, node, vmid)
|
||||
if err != nil {
|
||||
// Negative cache: don't retry for a while
|
||||
m.rrdCacheMu.Lock()
|
||||
m.vmAgentMemCache[cacheKey] = agentMemCacheEntry{negative: true, fetchedAt: now}
|
||||
m.rrdCacheMu.Unlock()
|
||||
return 0, err
|
||||
}
|
||||
|
||||
m.rrdCacheMu.Lock()
|
||||
m.vmAgentMemCache[cacheKey] = agentMemCacheEntry{available: available, fetchedAt: now}
|
||||
m.rrdCacheMu.Unlock()
|
||||
|
||||
return available, nil
|
||||
}
|
||||
|
||||
// RemoveDockerHost removes a docker host from the shared state and clears related alerts.
|
||||
func (m *Monitor) RemoveDockerHost(hostID string) (models.DockerHost, error) {
|
||||
hostID = strings.TrimSpace(hostID)
|
||||
|
|
@ -3149,6 +3208,14 @@ func (m *Monitor) cleanupRRDCache(now time.Time) {
|
|||
delete(m.vmRRDMemCache, key)
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up guest-agent memory cache (use longer TTL for negative entries)
|
||||
agentMaxAge := 2 * vmAgentMemNegativeTTL
|
||||
for key, entry := range m.vmAgentMemCache {
|
||||
if now.Sub(entry.fetchedAt) > agentMaxAge {
|
||||
delete(m.vmAgentMemCache, key)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// cleanupMetricsHistory removes stale entries from the metrics history.
|
||||
|
|
@ -3803,6 +3870,7 @@ func New(cfg *config.Config) (*Monitor, error) {
|
|||
guestSnapshots: make(map[string]GuestMemorySnapshot),
|
||||
nodeRRDMemCache: make(map[string]rrdMemCacheEntry),
|
||||
vmRRDMemCache: make(map[string]rrdMemCacheEntry),
|
||||
vmAgentMemCache: make(map[string]agentMemCacheEntry),
|
||||
removedDockerHosts: make(map[string]time.Time),
|
||||
dockerTokenBindings: make(map[string]string),
|
||||
removedKubernetesClusters: make(map[string]time.Time),
|
||||
|
|
@ -6872,7 +6940,7 @@ func (m *Monitor) pollVMsAndContainersEfficient(ctx context.Context, instanceNam
|
|||
// try Proxmox RRD's memavailable (cache-aware) before falling back to status.Mem
|
||||
// which can include reclaimable page cache (inflating usage). Refs: #1270
|
||||
if memAvailable == 0 {
|
||||
if rrdAvailable, rrdErr := m.getVMRRDMetrics(ctx, client, res.Node, res.VMID); rrdErr == nil && rrdAvailable > 0 {
|
||||
if rrdAvailable, rrdErr := m.getVMRRDMetrics(ctx, client, instanceName, res.Node, res.VMID); rrdErr == nil && rrdAvailable > 0 {
|
||||
memAvailable = rrdAvailable
|
||||
memorySource = "rrd-memavailable"
|
||||
guestRaw.MemInfoAvailable = memAvailable
|
||||
|
|
@ -6917,6 +6985,25 @@ func (m *Monitor) pollVMsAndContainersEfficient(ctx context.Context, instanceNam
|
|||
}
|
||||
}
|
||||
|
||||
// Last-resort fallback before status-mem: read /proc/meminfo via the
|
||||
// QEMU guest agent's file-read endpoint. This works for Linux VMs with
|
||||
// the guest agent running even when the balloon driver doesn't populate
|
||||
// the meminfo fields. Results are cached with negative backoff. Refs: #1270
|
||||
if memAvailable == 0 && detailedStatus.Agent.Value > 0 {
|
||||
if agentAvail, agentErr := m.getVMAgentMemAvailable(ctx, client, instanceName, res.Node, res.VMID); agentErr == nil && agentAvail > 0 {
|
||||
memAvailable = agentAvail
|
||||
memorySource = "guest-agent-meminfo"
|
||||
guestRaw.MemInfoAvailable = memAvailable
|
||||
log.Debug().
|
||||
Str("vm", res.Name).
|
||||
Str("node", res.Node).
|
||||
Int("vmid", res.VMID).
|
||||
Uint64("total", memTotal).
|
||||
Uint64("available", memAvailable).
|
||||
Msg("QEMU memory: using guest agent /proc/meminfo fallback (excludes reclaimable cache)")
|
||||
}
|
||||
}
|
||||
|
||||
switch {
|
||||
case memAvailable > 0:
|
||||
if memAvailable > memTotal {
|
||||
|
|
|
|||
|
|
@ -295,6 +295,10 @@ func (m *mockPVEClientExtra) GetVMAgentVersion(ctx context.Context, node string,
|
|||
return "1.0", nil
|
||||
}
|
||||
|
||||
func (m *mockPVEClientExtra) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
||||
return 0, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (m *mockPVEClientExtra) GetLXCRRDData(ctx context.Context, node string, vmid int, timeframe string, cf string, ds []string) ([]proxmox.GuestRRDPoint, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
@ -345,6 +349,9 @@ func TestMonitor_PollVMsAndContainersEfficient_Extra(t *testing.T) {
|
|||
metricsHistory: NewMetricsHistory(100, time.Hour),
|
||||
alertManager: alerts.NewManager(),
|
||||
stalenessTracker: NewStalenessTracker(nil),
|
||||
nodeRRDMemCache: make(map[string]rrdMemCacheEntry),
|
||||
vmRRDMemCache: make(map[string]rrdMemCacheEntry),
|
||||
vmAgentMemCache: make(map[string]agentMemCacheEntry),
|
||||
}
|
||||
defer m.alertManager.Stop()
|
||||
|
||||
|
|
@ -505,6 +512,10 @@ func (m *mockPVEClientStorage) GetStorageContent(ctx context.Context, node, stor
|
|||
return m.content, nil
|
||||
}
|
||||
|
||||
func (m *mockPVEClientStorage) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
||||
return 0, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func TestMonitor_RetryPVEPortFallback_Extra(t *testing.T) {
|
||||
m := &Monitor{
|
||||
config: &config.Config{},
|
||||
|
|
@ -865,6 +876,10 @@ func (m *mockPVEClientFailNodes) GetNodes(ctx context.Context) ([]proxmox.Node,
|
|||
return nil, fmt.Errorf("nodes failed")
|
||||
}
|
||||
|
||||
func (m *mockPVEClientFailNodes) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
||||
return 0, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
type mockExecutor struct {
|
||||
executed []PollTask
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,10 @@ type mockPVEClient struct {
|
|||
|
||||
func (m *mockPVEClient) GetNodes(ctx context.Context) ([]proxmox.Node, error) { return nil, nil }
|
||||
|
||||
func (m *mockPVEClient) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
||||
return 0, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func TestMonitor_GetConnectionStatuses(t *testing.T) {
|
||||
// Real Mode
|
||||
m := &Monitor{
|
||||
|
|
@ -698,6 +702,10 @@ func (m *mockPVEClientExtended) GetVMAgentVersion(ctx context.Context, node stri
|
|||
return "", nil
|
||||
}
|
||||
|
||||
func (m *mockPVEClientExtended) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
||||
return 0, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (m *mockPVEClientExtended) GetZFSPoolStatus(ctx context.Context, node string) ([]proxmox.ZFSPoolStatus, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package monitoring
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"testing"
|
||||
"time"
|
||||
|
|
@ -116,6 +117,10 @@ func (s *stubPVEClient) GetVMAgentVersion(ctx context.Context, node string, vmid
|
|||
return "", nil
|
||||
}
|
||||
|
||||
func (s *stubPVEClient) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
||||
return 0, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (s *stubPVEClient) GetZFSPoolStatus(ctx context.Context, node string) ([]proxmox.ZFSPoolStatus, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -371,6 +371,22 @@ func (m *Monitor) pollVMsWithNodes(ctx context.Context, instanceName string, clu
|
|||
// and makes the frontend's balloon marker logic ineffective.
|
||||
// Refs: #1070
|
||||
|
||||
// Fallback: try RRD memavailable (cached). Refs: #1270
|
||||
if memAvailable == 0 {
|
||||
if rrdAvailable, rrdErr := m.getVMRRDMetrics(ctx, client, instanceName, n.Node, vm.VMID); rrdErr == nil && rrdAvailable > 0 {
|
||||
memAvailable = rrdAvailable
|
||||
memorySource = "rrd-memavailable"
|
||||
guestRaw.MemInfoAvailable = memAvailable
|
||||
log.Debug().
|
||||
Str("vm", vm.Name).
|
||||
Str("node", n.Node).
|
||||
Int("vmid", vm.VMID).
|
||||
Uint64("total", memTotal).
|
||||
Uint64("available", memAvailable).
|
||||
Msg("QEMU memory: using RRD memavailable fallback (excludes reclaimable cache)")
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: use linked Pulse host agent's memory data.
|
||||
// gopsutil's Used = Total - Available (excludes page cache),
|
||||
// so we can derive accurate available memory. Refs: #1270
|
||||
|
|
@ -395,6 +411,23 @@ func (m *Monitor) pollVMsWithNodes(ctx context.Context, instanceName string, clu
|
|||
}
|
||||
}
|
||||
|
||||
// Last-resort fallback before status-mem: read /proc/meminfo via the
|
||||
// QEMU guest agent file-read endpoint. Refs: #1270
|
||||
if memAvailable == 0 && status.Agent.Value > 0 {
|
||||
if agentAvail, agentErr := m.getVMAgentMemAvailable(ctx, client, instanceName, n.Node, vm.VMID); agentErr == nil && agentAvail > 0 {
|
||||
memAvailable = agentAvail
|
||||
memorySource = "guest-agent-meminfo"
|
||||
guestRaw.MemInfoAvailable = memAvailable
|
||||
log.Debug().
|
||||
Str("vm", vm.Name).
|
||||
Str("node", n.Node).
|
||||
Int("vmid", vm.VMID).
|
||||
Uint64("total", memTotal).
|
||||
Uint64("available", memAvailable).
|
||||
Msg("QEMU memory: using guest agent /proc/meminfo fallback (excludes reclaimable cache)")
|
||||
}
|
||||
}
|
||||
|
||||
switch {
|
||||
case memAvailable > 0:
|
||||
if memAvailable > memTotal {
|
||||
|
|
@ -411,7 +444,7 @@ func (m *Monitor) pollVMsWithNodes(ctx context.Context, instanceName string, clu
|
|||
// Refs: #1185
|
||||
memUsed = vmStatus.Mem
|
||||
memorySource = "status-mem"
|
||||
case vmStatus.FreeMem > 0:
|
||||
case vmStatus.FreeMem > 0 && memTotal >= vmStatus.FreeMem:
|
||||
memUsed = memTotal - vmStatus.FreeMem
|
||||
memorySource = "status-freemem"
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -27,6 +27,10 @@ func (m *mockPVEClientSnapshots) GetContainerSnapshots(ctx context.Context, node
|
|||
return m.snapshots, nil
|
||||
}
|
||||
|
||||
func (m *mockPVEClientSnapshots) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
||||
return 0, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func TestMonitor_PollGuestSnapshots_Coverage(t *testing.T) {
|
||||
m := &Monitor{
|
||||
state: models.NewState(),
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package monitoring
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/rcourtman/pulse-go-rewrite/internal/models"
|
||||
|
|
@ -86,6 +87,9 @@ func (f fakeSnapshotClient) GetVMAgentInfo(ctx context.Context, node string, vmi
|
|||
func (f fakeSnapshotClient) GetVMAgentVersion(ctx context.Context, node string, vmid int) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
func (f fakeSnapshotClient) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
||||
return 0, fmt.Errorf("not implemented")
|
||||
}
|
||||
func (f fakeSnapshotClient) GetZFSPoolStatus(ctx context.Context, node string) ([]proxmox.ZFSPoolStatus, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,10 @@ func (m *mockPVEClientForStorage) GetStorage(ctx context.Context, node string) (
|
|||
return m.Storages, nil
|
||||
}
|
||||
|
||||
func (m *mockPVEClientForStorage) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
||||
return 0, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (m *mockPVEClientForStorage) GetStorageContent(ctx context.Context, node, storage string) ([]proxmox.StorageContent, error) {
|
||||
if m.StorageToFail != nil && m.StorageToFail[storage] {
|
||||
return nil, fmt.Errorf("failed to get content")
|
||||
|
|
|
|||
|
|
@ -116,6 +116,10 @@ func (f *fakeStorageClient) GetVMAgentVersion(ctx context.Context, node string,
|
|||
return "", nil
|
||||
}
|
||||
|
||||
func (f *fakeStorageClient) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
||||
return 0, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (f *fakeStorageClient) GetZFSPoolStatus(ctx context.Context, node string) ([]proxmox.ZFSPoolStatus, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1655,6 +1655,52 @@ func (c *Client) GetVMNetworkInterfaces(ctx context.Context, node string, vmid i
|
|||
return result.Data.Result, nil
|
||||
}
|
||||
|
||||
// GetVMMemAvailableFromAgent reads /proc/meminfo via the QEMU guest agent's
|
||||
// file-read endpoint and returns MemAvailable in bytes. This is a fallback for
|
||||
// VMs where the balloon driver does not populate the meminfo field in the
|
||||
// status endpoint. Returns 0 if the guest agent is unavailable, the file
|
||||
// cannot be read, or MemAvailable is not present (e.g. Windows VMs).
|
||||
func (c *Client) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
||||
fileParam := url.QueryEscape("/proc/meminfo")
|
||||
resp, err := c.get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/agent/file-read?file=%s", node, vmid, fileParam))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("guest agent file-read /proc/meminfo: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var result struct {
|
||||
Data struct {
|
||||
Content string `json:"content"`
|
||||
Truncated *bool `json:"truncated,omitempty"`
|
||||
} `json:"data"`
|
||||
}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
return 0, fmt.Errorf("decode file-read response: %w", err)
|
||||
}
|
||||
|
||||
// Parse MemAvailable from /proc/meminfo (format: "MemAvailable: 12345 kB")
|
||||
for _, line := range strings.Split(result.Data.Content, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if !strings.HasPrefix(line, "MemAvailable:") {
|
||||
continue
|
||||
}
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 2 {
|
||||
continue
|
||||
}
|
||||
kB, err := strconv.ParseUint(fields[1], 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("parse MemAvailable value %q: %w", fields[1], err)
|
||||
}
|
||||
if kB > math.MaxUint64/1024 {
|
||||
return 0, fmt.Errorf("MemAvailable value %d kB overflows uint64", kB)
|
||||
}
|
||||
return kB * 1024, nil // Convert kB to bytes
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("MemAvailable not found in /proc/meminfo")
|
||||
}
|
||||
|
||||
// GetVMStatus returns detailed VM status including balloon info
|
||||
func (c *Client) GetVMStatus(ctx context.Context, node string, vmid int) (*VMStatus, error) {
|
||||
// Note: Proxmox 9.x removed support for the "full" parameter
|
||||
|
|
|
|||
|
|
@ -1245,6 +1245,20 @@ func (cc *ClusterClient) GetVMNetworkInterfaces(ctx context.Context, node string
|
|||
return result, err
|
||||
}
|
||||
|
||||
// GetVMMemAvailableFromAgent reads /proc/meminfo via the QEMU guest agent to get MemAvailable.
|
||||
func (cc *ClusterClient) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
||||
var result uint64
|
||||
err := cc.executeWithFailover(ctx, func(client *Client) error {
|
||||
available, err := client.GetVMMemAvailableFromAgent(ctx, node, vmid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
result = available
|
||||
return nil
|
||||
})
|
||||
return result, err
|
||||
}
|
||||
|
||||
// GetClusterResources returns all resources (VMs, containers) across the cluster in a single call
|
||||
func (cc *ClusterClient) GetClusterResources(ctx context.Context, resourceType string) ([]ClusterResource, error) {
|
||||
var result []ClusterResource
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue