mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-06 07:57:08 +00:00
Prefer linked host disk metrics for Proxmox nodes
This commit is contained in:
parent
aef1bd41fa
commit
fadd3ef2bd
7 changed files with 290 additions and 30 deletions
|
|
@ -191,3 +191,10 @@ legacy aliases such as `rrd-available`, `rrd-data`, `node-status-available`,
|
|||
`calculated`, and `listing` must normalize onto the governed canonical labels
|
||||
before snapshots are returned to diagnostics consumers, not only when new
|
||||
snapshots are first recorded.
|
||||
Node disk-source selection now also routes through one canonical resolver
|
||||
under `internal/monitoring/`. When a Proxmox node has a linked Pulse host
|
||||
agent, the node summary must prefer the linked host's canonical disk view over
|
||||
Proxmox `rootfs` bytes because dataset-level `rootfs` can materially
|
||||
under-report ZFS-backed node capacity and usage. Proxmox `rootfs` and `/nodes`
|
||||
disk values remain fallback sources only when no linked host disk truth is
|
||||
available.
|
||||
|
|
|
|||
25
internal/models/disk_summary.go
Normal file
25
internal/models/disk_summary.go
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
package models
|
||||
|
||||
import "strings"
|
||||
|
||||
// SummaryDisk selects the canonical summary disk for host-level metrics.
|
||||
// Prefer the root-mounted filesystem when present; otherwise fall back to the
|
||||
// first disk with non-zero capacity in the existing collection order.
|
||||
func SummaryDisk(disks []Disk) (Disk, bool) {
|
||||
for _, disk := range disks {
|
||||
if disk.Total <= 0 {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(disk.Mountpoint) == "/" {
|
||||
return disk, true
|
||||
}
|
||||
}
|
||||
|
||||
for _, disk := range disks {
|
||||
if disk.Total > 0 {
|
||||
return disk, true
|
||||
}
|
||||
}
|
||||
|
||||
return Disk{}, false
|
||||
}
|
||||
47
internal/models/disk_summary_test.go
Normal file
47
internal/models/disk_summary_test.go
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
package models
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestSummaryDiskPrefersRootMount(t *testing.T) {
|
||||
disks := []Disk{
|
||||
{Mountpoint: "/boot", Total: 1, Used: 1},
|
||||
{Mountpoint: "/", Total: 100, Used: 40, Free: 60, Usage: 40},
|
||||
{Mountpoint: "/var", Total: 200, Used: 50},
|
||||
}
|
||||
|
||||
got, ok := SummaryDisk(disks)
|
||||
if !ok {
|
||||
t.Fatal("expected summary disk")
|
||||
}
|
||||
if got.Mountpoint != "/" {
|
||||
t.Fatalf("summary mountpoint = %q, want /", got.Mountpoint)
|
||||
}
|
||||
if got.Total != 100 || got.Used != 40 {
|
||||
t.Fatalf("summary disk = %+v, want root disk", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSummaryDiskFallsBackToFirstNonZeroDisk(t *testing.T) {
|
||||
disks := []Disk{
|
||||
{Mountpoint: "", Total: 0},
|
||||
{Mountpoint: "/tank", Total: 400, Used: 100, Free: 300, Usage: 25},
|
||||
{Mountpoint: "/backup", Total: 800, Used: 200},
|
||||
}
|
||||
|
||||
got, ok := SummaryDisk(disks)
|
||||
if !ok {
|
||||
t.Fatal("expected summary disk")
|
||||
}
|
||||
if got.Mountpoint != "/tank" {
|
||||
t.Fatalf("summary mountpoint = %q, want /tank", got.Mountpoint)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSummaryDiskReturnsFalseWhenNoUsableDiskExists(t *testing.T) {
|
||||
if _, ok := SummaryDisk(nil); ok {
|
||||
t.Fatal("expected no summary disk for nil input")
|
||||
}
|
||||
if _, ok := SummaryDisk([]Disk{{Mountpoint: "/", Total: 0}}); ok {
|
||||
t.Fatal("expected no summary disk for zero-capacity disks")
|
||||
}
|
||||
}
|
||||
|
|
@ -107,6 +107,23 @@ func TestLegacyMemorySourceAliasesRemainCanonicalized(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestProxmoxNodeDiskUsesCanonicalResolver(t *testing.T) {
|
||||
data, err := os.ReadFile("monitor_polling_node.go")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read monitor_polling_node.go: %v", err)
|
||||
}
|
||||
source := string(data)
|
||||
requiredSnippets := []string{
|
||||
"modelNode.Disk, _ = m.resolveNodeDisk(",
|
||||
"if resolvedDisk, diskSource := m.resolveNodeDisk(",
|
||||
}
|
||||
for _, snippet := range requiredSnippets {
|
||||
if !strings.Contains(source, snippet) {
|
||||
t.Fatalf("monitor_polling_node.go must contain %q", snippet)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertLifecycleCanonicalChangesRemainWritable(t *testing.T) {
|
||||
store := unifiedresources.NewMemoryStore()
|
||||
incidentStore := memory.NewIncidentStore(memory.IncidentStoreConfig{})
|
||||
|
|
|
|||
|
|
@ -42,12 +42,6 @@ func (m *Monitor) pollPVENode(
|
|||
Free: int64(node.MaxMem - node.Mem),
|
||||
Usage: safePercentage(float64(node.Mem), float64(node.MaxMem)),
|
||||
},
|
||||
Disk: models.Disk{
|
||||
Total: int64(node.MaxDisk),
|
||||
Used: int64(node.Disk),
|
||||
Free: int64(node.MaxDisk - node.Disk),
|
||||
Usage: safePercentage(float64(node.Disk), float64(node.MaxDisk)),
|
||||
},
|
||||
Uptime: int64(node.Uptime),
|
||||
LoadAverage: []float64{},
|
||||
LastSeen: time.Now(),
|
||||
|
|
@ -56,6 +50,7 @@ func (m *Monitor) pollPVENode(
|
|||
ClusterName: instanceCfg.ClusterName,
|
||||
TemperatureMonitoringEnabled: instanceCfg.TemperatureMonitoringEnabled,
|
||||
}
|
||||
modelNode.Disk, _ = m.resolveNodeDisk(instanceName, nodeID, node.Node, node, nil)
|
||||
|
||||
nodeSnapshotRaw := NodeMemoryRaw{
|
||||
Total: node.MaxMem,
|
||||
|
|
@ -138,31 +133,9 @@ func (m *Monitor) pollPVENode(
|
|||
modelNode.KernelVersion = nodeInfo.KernelVersion
|
||||
modelNode.PVEVersion = nodeInfo.PVEVersion
|
||||
|
||||
// Prefer rootfs data for more accurate disk metrics, but ensure we have valid fallback
|
||||
if nodeInfo.RootFS != nil && nodeInfo.RootFS.Total > 0 {
|
||||
modelNode.Disk = models.Disk{
|
||||
Total: int64(nodeInfo.RootFS.Total),
|
||||
Used: int64(nodeInfo.RootFS.Used),
|
||||
Free: int64(nodeInfo.RootFS.Free),
|
||||
Usage: safePercentage(float64(nodeInfo.RootFS.Used), float64(nodeInfo.RootFS.Total)),
|
||||
}
|
||||
log.Debug().
|
||||
Str("node", node.Node).
|
||||
Uint64("rootfsUsed", nodeInfo.RootFS.Used).
|
||||
Uint64("rootfsTotal", nodeInfo.RootFS.Total).
|
||||
Float64("rootfsUsage", modelNode.Disk.Usage).
|
||||
Msg("Using rootfs for disk metrics")
|
||||
} else if node.Disk > 0 && node.MaxDisk > 0 {
|
||||
// RootFS unavailable but we have valid disk data from /nodes endpoint
|
||||
// Keep the values we already set from the nodes list
|
||||
log.Debug().
|
||||
Str("node", node.Node).
|
||||
Bool("rootfsNil", nodeInfo.RootFS == nil).
|
||||
Uint64("fallbackDisk", node.Disk).
|
||||
Uint64("fallbackMaxDisk", node.MaxDisk).
|
||||
Msg("RootFS data unavailable - using /nodes endpoint disk metrics")
|
||||
if resolvedDisk, diskSource := m.resolveNodeDisk(instanceName, nodeID, node.Node, node, nodeInfo); diskSource != "" {
|
||||
modelNode.Disk = resolvedDisk
|
||||
} else {
|
||||
// Neither rootfs nor valid node disk data available
|
||||
log.Warn().
|
||||
Str("node", node.Node).
|
||||
Bool("rootfsNil", nodeInfo.RootFS == nil).
|
||||
|
|
|
|||
104
internal/monitoring/node_disk_sources.go
Normal file
104
internal/monitoring/node_disk_sources.go
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
package monitoring
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/rcourtman/pulse-go-rewrite/internal/models"
|
||||
"github.com/rcourtman/pulse-go-rewrite/pkg/proxmox"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func (m *Monitor) resolveNodeDisk(
|
||||
instanceName string,
|
||||
nodeID string,
|
||||
nodeName string,
|
||||
node proxmox.Node,
|
||||
nodeInfo *proxmox.NodeStatus,
|
||||
) (models.Disk, string) {
|
||||
if linkedHost := m.linkedHostForNode(instanceName, nodeID, nodeName); linkedHost != nil {
|
||||
if disk, ok := models.SummaryDisk(linkedHost.Disks); ok {
|
||||
resolved := models.Disk{
|
||||
Total: disk.Total,
|
||||
Used: disk.Used,
|
||||
Free: disk.Free,
|
||||
Usage: disk.Usage,
|
||||
}
|
||||
log.Debug().
|
||||
Str("instance", instanceName).
|
||||
Str("node", nodeName).
|
||||
Str("hostAgentID", linkedHost.ID).
|
||||
Int64("total", resolved.Total).
|
||||
Int64("used", resolved.Used).
|
||||
Float64("usage", resolved.Usage).
|
||||
Msg("Node disk: using linked Pulse host agent disk summary")
|
||||
return resolved, "agent"
|
||||
}
|
||||
}
|
||||
|
||||
if nodeInfo != nil && nodeInfo.RootFS != nil && nodeInfo.RootFS.Total > 0 {
|
||||
resolved := models.Disk{
|
||||
Total: int64(nodeInfo.RootFS.Total),
|
||||
Used: int64(nodeInfo.RootFS.Used),
|
||||
Free: int64(nodeInfo.RootFS.Free),
|
||||
Usage: safePercentage(float64(nodeInfo.RootFS.Used), float64(nodeInfo.RootFS.Total)),
|
||||
}
|
||||
log.Debug().
|
||||
Str("instance", instanceName).
|
||||
Str("node", nodeName).
|
||||
Uint64("rootfsUsed", nodeInfo.RootFS.Used).
|
||||
Uint64("rootfsTotal", nodeInfo.RootFS.Total).
|
||||
Float64("rootfsUsage", resolved.Usage).
|
||||
Msg("Node disk: using Proxmox rootfs metrics")
|
||||
return resolved, "node-status-rootfs"
|
||||
}
|
||||
|
||||
if node.MaxDisk > 0 {
|
||||
resolved := models.Disk{
|
||||
Total: int64(node.MaxDisk),
|
||||
Used: int64(node.Disk),
|
||||
Free: int64(node.MaxDisk - node.Disk),
|
||||
Usage: safePercentage(float64(node.Disk), float64(node.MaxDisk)),
|
||||
}
|
||||
log.Debug().
|
||||
Str("instance", instanceName).
|
||||
Str("node", nodeName).
|
||||
Uint64("disk", node.Disk).
|
||||
Uint64("maxDisk", node.MaxDisk).
|
||||
Float64("usage", resolved.Usage).
|
||||
Msg("Node disk: using /nodes endpoint metrics")
|
||||
return resolved, "nodes-endpoint"
|
||||
}
|
||||
|
||||
return models.Disk{}, ""
|
||||
}
|
||||
|
||||
func (m *Monitor) linkedHostForNode(instanceName, nodeID, nodeName string) *models.Host {
|
||||
readState := m.GetUnifiedReadStateOrSnapshot()
|
||||
if readState == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
linkedAgentID := ""
|
||||
for _, existingNode := range nodesForInstanceFromReadState(readState, instanceName) {
|
||||
if existingNode.ID == nodeID || strings.EqualFold(existingNode.Name, nodeName) {
|
||||
linkedAgentID = strings.TrimSpace(existingNode.LinkedAgentID)
|
||||
break
|
||||
}
|
||||
}
|
||||
if linkedAgentID == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, host := range hostsFromReadState(readState) {
|
||||
if strings.TrimSpace(host.ID) != linkedAgentID {
|
||||
continue
|
||||
}
|
||||
if !strings.EqualFold(strings.TrimSpace(host.Status), "online") {
|
||||
return nil
|
||||
}
|
||||
resolved := host
|
||||
return &resolved
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ import (
|
|||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/rcourtman/pulse-go-rewrite/internal/models"
|
||||
"github.com/rcourtman/pulse-go-rewrite/pkg/proxmox"
|
||||
)
|
||||
|
||||
|
|
@ -99,3 +100,89 @@ func TestResolveNodeMemoryCharacterization(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPollPVENodePrefersLinkedHostDiskOverRootFS(t *testing.T) {
|
||||
mon := newTestPVEMonitor("test")
|
||||
defer mon.alertManager.Stop()
|
||||
defer mon.notificationMgr.Stop()
|
||||
|
||||
mon.state.UpsertHost(models.Host{
|
||||
ID: "host-1",
|
||||
Hostname: "ebringa",
|
||||
Status: "online",
|
||||
Disks: []models.Disk{
|
||||
{Mountpoint: "/", Type: "zfs", Total: 975, Used: 410, Free: 565, Usage: 42.0512820513},
|
||||
},
|
||||
})
|
||||
mon.state.UpdateNodesForInstance("test", []models.Node{
|
||||
{
|
||||
ID: "test-ebringa",
|
||||
Name: "ebringa",
|
||||
Instance: "test",
|
||||
LinkedAgentID: "host-1",
|
||||
},
|
||||
})
|
||||
|
||||
client := &stubPVEClient{
|
||||
nodeStatus: &proxmox.NodeStatus{
|
||||
RootFS: &proxmox.RootFS{
|
||||
Total: 975,
|
||||
Used: 3,
|
||||
Free: 972,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
modelNode, _, err := mon.pollPVENode(
|
||||
context.Background(),
|
||||
"test",
|
||||
&mon.config.PVEInstances[0],
|
||||
client,
|
||||
proxmox.Node{
|
||||
Node: "ebringa",
|
||||
Status: "online",
|
||||
MaxDisk: 975,
|
||||
Disk: 3,
|
||||
},
|
||||
"healthy",
|
||||
nil,
|
||||
nil,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("pollPVENode() error = %v", err)
|
||||
}
|
||||
|
||||
if modelNode.Disk.Total != 975 || modelNode.Disk.Used != 410 || modelNode.Disk.Free != 565 {
|
||||
t.Fatalf("node disk = %+v, want linked host summary", modelNode.Disk)
|
||||
}
|
||||
if modelNode.Disk.Usage < 42 || modelNode.Disk.Usage > 42.1 {
|
||||
t.Fatalf("node disk usage = %.2f, want linked host usage around 42.05", modelNode.Disk.Usage)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveNodeDiskFallsBackToRootFSWithoutLinkedHost(t *testing.T) {
|
||||
mon := newTestPVEMonitor("test")
|
||||
defer mon.alertManager.Stop()
|
||||
defer mon.notificationMgr.Stop()
|
||||
|
||||
disk, source := mon.resolveNodeDisk(
|
||||
"test",
|
||||
"test-node1",
|
||||
"node1",
|
||||
proxmox.Node{Node: "node1", Disk: 10, MaxDisk: 100},
|
||||
&proxmox.NodeStatus{
|
||||
RootFS: &proxmox.RootFS{
|
||||
Total: 200,
|
||||
Used: 50,
|
||||
Free: 150,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
if source != "node-status-rootfs" {
|
||||
t.Fatalf("source = %q, want node-status-rootfs", source)
|
||||
}
|
||||
if disk.Total != 200 || disk.Used != 50 || disk.Free != 150 {
|
||||
t.Fatalf("disk = %+v, want rootfs values", disk)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue