Pulse/internal/monitoring/host_agent_temps_test.go

594 lines
16 KiB
Go

package monitoring
import (
"testing"
"time"
"github.com/rcourtman/pulse-go-rewrite/internal/models"
unifiedresources "github.com/rcourtman/pulse-go-rewrite/internal/unifiedresources"
"github.com/stretchr/testify/assert"
)
func TestConvertHostSensorsToTemperature_Empty(t *testing.T) {
sensors := models.HostSensorSummary{}
result := convertHostSensorsToTemperature(sensors, time.Now())
if result != nil {
t.Error("expected nil for empty sensors")
}
}
func TestConvertHostSensorsToTemperature_CPUOnly(t *testing.T) {
sensors := models.HostSensorSummary{
TemperatureCelsius: map[string]float64{
"cpu_package": 55.0,
"cpu_core_0": 50.0,
"cpu_core_1": 52.0,
},
}
now := time.Now()
result := convertHostSensorsToTemperature(sensors, now)
if result == nil {
t.Fatal("expected non-nil result")
}
if !result.Available {
t.Error("expected Available to be true")
}
if !result.HasCPU {
t.Error("expected HasCPU to be true")
}
if result.CPUPackage != 55.0 {
t.Errorf("expected CPUPackage 55.0, got %f", result.CPUPackage)
}
if len(result.Cores) != 2 {
t.Errorf("expected 2 cores, got %d", len(result.Cores))
}
// Cores should be sorted
if result.Cores[0].Core != 0 || result.Cores[1].Core != 1 {
t.Error("cores not sorted correctly")
}
if result.CPUMax != 52.0 { // Max of core temps
t.Errorf("expected CPUMax 52.0, got %f", result.CPUMax)
}
}
func TestConvertHostSensorsToTemperature_NVMe(t *testing.T) {
sensors := models.HostSensorSummary{
TemperatureCelsius: map[string]float64{
"cpu_package": 45.0,
"nvme0": 40.0,
"nvme1": 42.0,
},
}
result := convertHostSensorsToTemperature(sensors, time.Now())
if result == nil {
t.Fatal("expected non-nil result")
}
if !result.HasNVMe {
t.Error("expected HasNVMe to be true")
}
if len(result.NVMe) != 2 {
t.Errorf("expected 2 NVMe devices, got %d", len(result.NVMe))
}
// NVMe should be sorted
if result.NVMe[0].Device != "nvme0" || result.NVMe[1].Device != "nvme1" {
t.Error("NVMe devices not sorted correctly")
}
}
func TestConvertHostSensorsToTemperature_GPU(t *testing.T) {
sensors := models.HostSensorSummary{
TemperatureCelsius: map[string]float64{
"cpu_package": 45.0,
"gpu_edge": 60.0,
"gpu_junction": 65.0,
"gpu_mem": 55.0,
},
}
result := convertHostSensorsToTemperature(sensors, time.Now())
if result == nil {
t.Fatal("expected non-nil result")
}
if !result.HasGPU {
t.Error("expected HasGPU to be true")
}
if len(result.GPU) != 1 {
t.Errorf("expected 1 GPU, got %d", len(result.GPU))
}
if result.GPU[0].Device != "gpu0" {
t.Errorf("expected device 'gpu0', got %q", result.GPU[0].Device)
}
if result.GPU[0].Edge != 60.0 {
t.Errorf("expected Edge 60.0, got %f", result.GPU[0].Edge)
}
if result.GPU[0].Junction != 65.0 {
t.Errorf("expected Junction 65.0, got %f", result.GPU[0].Junction)
}
}
func TestConvertHostSensorsToTemperature_GenericGPU(t *testing.T) {
sensors := models.HostSensorSummary{
TemperatureCelsius: map[string]float64{
"cpu_package": 45.0,
"gpu_nvidia": 70.0,
},
}
result := convertHostSensorsToTemperature(sensors, time.Now())
if result == nil {
t.Fatal("expected non-nil result")
}
if !result.HasGPU {
t.Error("expected HasGPU to be true")
}
if len(result.GPU) != 1 {
t.Errorf("expected 1 GPU, got %d", len(result.GPU))
}
if result.GPU[0].Device != "nvidia" {
t.Errorf("expected device 'nvidia', got %q", result.GPU[0].Device)
}
if result.GPU[0].Edge != 70.0 {
t.Errorf("expected Edge 70.0, got %f", result.GPU[0].Edge)
}
}
func TestConvertHostSensorsToTemperature_NoPackageUsesMaxCore(t *testing.T) {
sensors := models.HostSensorSummary{
TemperatureCelsius: map[string]float64{
"cpu_core_0": 50.0,
"cpu_core_1": 55.0,
},
}
result := convertHostSensorsToTemperature(sensors, time.Now())
if result == nil {
t.Fatal("expected non-nil result")
}
// When no package temp, CPUPackage should use max core temp
if result.CPUPackage != 55.0 {
t.Errorf("expected CPUPackage to be max core temp 55.0, got %f", result.CPUPackage)
}
}
func TestShouldSkipTemperatureSSHCollection(t *testing.T) {
t.Run("nil host agent temp does not skip", func(t *testing.T) {
if shouldSkipTemperatureSSHCollection(nil) {
t.Fatal("expected nil host agent temp not to skip SSH collection")
}
})
t.Run("cpu only host agent temp does not skip", func(t *testing.T) {
host := &models.Temperature{
Available: true,
HasCPU: true,
CPUPackage: 55,
}
if shouldSkipTemperatureSSHCollection(host) {
t.Fatal("expected CPU-only host agent temp not to skip SSH collection")
}
})
t.Run("host agent smart data skips", func(t *testing.T) {
host := &models.Temperature{
Available: true,
HasCPU: true,
HasSMART: true,
SMART: []models.DiskTemp{{Device: "/dev/sda", Temperature: 35}},
}
if !shouldSkipTemperatureSSHCollection(host) {
t.Fatal("expected SMART-capable host agent temp to skip SSH collection")
}
})
}
func TestIsHostAgentTemperatureRecent(t *testing.T) {
tests := []struct {
name string
lastSeen time.Time
expected bool
}{
{
name: "recent - just now",
lastSeen: time.Now(),
expected: true,
},
{
name: "recent - 1 minute ago",
lastSeen: time.Now().Add(-1 * time.Minute),
expected: true,
},
{
name: "stale - 3 minutes ago",
lastSeen: time.Now().Add(-3 * time.Minute),
expected: false,
},
{
name: "stale - 1 hour ago",
lastSeen: time.Now().Add(-1 * time.Hour),
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isHostAgentTemperatureRecent(tt.lastSeen)
if result != tt.expected {
t.Errorf("isHostAgentTemperatureRecent() = %v, want %v", result, tt.expected)
}
})
}
}
func TestMergeTemperatureData_NilInputs(t *testing.T) {
t.Run("both nil", func(t *testing.T) {
result := mergeTemperatureData(nil, nil)
if result != nil {
t.Error("expected nil for both nil inputs")
}
})
t.Run("host nil", func(t *testing.T) {
proxy := &models.Temperature{CPUPackage: 50.0}
result := mergeTemperatureData(nil, proxy)
if result != proxy {
t.Error("expected proxy when host is nil")
}
})
t.Run("proxy nil", func(t *testing.T) {
host := &models.Temperature{CPUPackage: 55.0}
result := mergeTemperatureData(host, nil)
if result != host {
t.Error("expected host when proxy is nil")
}
})
}
func TestMergeTemperatureData_Merge(t *testing.T) {
hostTemp := &models.Temperature{
CPUPackage: 55.0,
CPUMax: 55.0,
HasCPU: true,
Cores: []models.CoreTemp{
{Core: 0, Temp: 50.0},
{Core: 1, Temp: 55.0},
},
LastUpdate: time.Now(),
}
proxyTemp := &models.Temperature{
CPUPackage: 52.0,
CPUMin: 30.0,
CPUMaxRecord: 60.0,
HasCPU: true,
HasSMART: true,
SMART: []models.DiskTemp{
{Device: "/dev/sda", Temperature: 35},
},
}
result := mergeTemperatureData(hostTemp, proxyTemp)
if result == nil {
t.Fatal("expected non-nil result")
}
// Host agent CPU takes priority
if result.CPUPackage != 55.0 {
t.Errorf("expected CPUPackage 55.0 from host, got %f", result.CPUPackage)
}
// Proxy historical data preserved
if result.CPUMin != 30.0 {
t.Errorf("expected CPUMin 30.0 from proxy, got %f", result.CPUMin)
}
// SMART data from proxy preserved
if !result.HasSMART {
t.Error("expected HasSMART to be true")
}
if len(result.SMART) != 1 {
t.Errorf("expected 1 SMART disk, got %d", len(result.SMART))
}
}
func TestMergeTemperatureData_FallbackToProxy(t *testing.T) {
// Host has no CPU data, should fall back to proxy
hostTemp := &models.Temperature{
HasGPU: true,
GPU: []models.GPUTemp{
{Device: "gpu0", Edge: 70.0},
},
LastUpdate: time.Now(),
}
proxyTemp := &models.Temperature{
CPUPackage: 52.0,
CPUMax: 52.0,
HasCPU: true,
Cores: []models.CoreTemp{
{Core: 0, Temp: 48.0},
},
}
result := mergeTemperatureData(hostTemp, proxyTemp)
if result == nil {
t.Fatal("expected non-nil result")
}
// Should fall back to proxy CPU data
if result.CPUPackage != 52.0 {
t.Errorf("expected CPUPackage 52.0 from proxy fallback, got %f", result.CPUPackage)
}
if len(result.Cores) != 1 {
t.Errorf("expected 1 core from proxy, got %d", len(result.Cores))
}
// Host GPU data should be present
if !result.HasGPU {
t.Error("expected HasGPU to be true")
}
if len(result.GPU) != 1 {
t.Errorf("expected 1 GPU from host, got %d", len(result.GPU))
}
}
func TestGetHostAgentTemperature(t *testing.T) {
m := &Monitor{state: models.NewState()}
t.Run("no hosts in state", func(t *testing.T) {
result := m.getHostAgentTemperature("node1")
assert.Nil(t, result)
})
t.Run("match by linked node id", func(t *testing.T) {
host := models.Host{
ID: "host1",
LinkedNodeID: "node-123",
Sensors: models.HostSensorSummary{
TemperatureCelsius: map[string]float64{"cpu_package": 60.0},
},
}
m.state.UpsertHost(host)
result := m.getHostAgentTemperatureByID("node-123", "different-name")
assert.NotNil(t, result)
assert.Equal(t, 60.0, result.CPUPackage)
})
t.Run("match by hostname fallback", func(t *testing.T) {
host := models.Host{
ID: "host2",
Hostname: "node2",
Sensors: models.HostSensorSummary{
TemperatureCelsius: map[string]float64{"cpu_package": 65.0},
},
}
m.state.UpsertHost(host)
result := m.getHostAgentTemperature("node2")
assert.NotNil(t, result)
assert.Equal(t, 65.0, result.CPUPackage)
})
t.Run("no matching host", func(t *testing.T) {
result := m.getHostAgentTemperature("node-missing")
assert.Nil(t, result)
})
t.Run("matching host but no sensor data", func(t *testing.T) {
host := models.Host{
ID: "host3",
Hostname: "node3",
}
m.state.UpsertHost(host)
result := m.getHostAgentTemperature("node3")
assert.Nil(t, result)
})
}
func TestConvertHostSensorsToTemperature_ExtraBranches(t *testing.T) {
t.Run("SMART disk standby", func(t *testing.T) {
sensors := models.HostSensorSummary{
TemperatureCelsius: map[string]float64{"cpu_package": 45.0},
SMART: []models.HostDiskSMART{
{Device: "sda", Temperature: 35, Standby: false},
{Device: "sdb", Temperature: 0, Standby: true},
},
}
result := convertHostSensorsToTemperature(sensors, time.Now())
assert.NotNil(t, result)
assert.Len(t, result.SMART, 1)
assert.Equal(t, "/dev/sda", result.SMART[0].Device)
})
t.Run("GPU merge into same device", func(t *testing.T) {
sensors := models.HostSensorSummary{
TemperatureCelsius: map[string]float64{
"gpu_edge": 60.0,
"gpu_junction": 65.0,
},
}
result := convertHostSensorsToTemperature(sensors, time.Now())
assert.NotNil(t, result)
assert.Len(t, result.GPU, 1)
assert.Equal(t, "gpu0", result.GPU[0].Device)
assert.Equal(t, 60.0, result.GPU[0].Edge)
assert.Equal(t, 65.0, result.GPU[0].Junction)
})
}
func TestGetClusterSensorTemperature(t *testing.T) {
m := &Monitor{
state: models.NewState(),
clusterSensorsCache: make(map[string]clusterSensorsCacheEntry),
}
t.Run("empty cache returns nil", func(t *testing.T) {
result := m.getClusterSensorTemperature("node1")
assert.Nil(t, result)
})
t.Run("cached data returned", func(t *testing.T) {
m.clusterSensorsCache["node2"] = clusterSensorsCacheEntry{
sensors: models.HostSensorSummary{
TemperatureCelsius: map[string]float64{
"cpu_package": 58.0,
"cpu_core_0": 55.0,
},
},
updatedAt: time.Now(),
}
result := m.getClusterSensorTemperature("node2")
assert.NotNil(t, result)
assert.Equal(t, 58.0, result.CPUPackage)
assert.True(t, result.HasCPU)
})
t.Run("stale data returns nil", func(t *testing.T) {
m.clusterSensorsCache["stale-node"] = clusterSensorsCacheEntry{
sensors: models.HostSensorSummary{
TemperatureCelsius: map[string]float64{"cpu_package": 50.0},
},
updatedAt: time.Now().Add(-5 * time.Minute), // older than 2min threshold
}
result := m.getClusterSensorTemperature("stale-node")
assert.Nil(t, result)
})
t.Run("case insensitive lookup", func(t *testing.T) {
m.clusterSensorsCache["mynode"] = clusterSensorsCacheEntry{
sensors: models.HostSensorSummary{
TemperatureCelsius: map[string]float64{"cpu_package": 60.0},
},
updatedAt: time.Now(),
}
result := m.getClusterSensorTemperature("MyNode")
assert.NotNil(t, result)
assert.Equal(t, 60.0, result.CPUPackage)
})
t.Run("empty node name returns nil", func(t *testing.T) {
result := m.getClusterSensorTemperature("")
assert.Nil(t, result)
})
}
func TestGetHostAgentTemperatureByID_ClusterFallback(t *testing.T) {
m := &Monitor{
state: models.NewState(),
clusterSensorsCache: make(map[string]clusterSensorsCacheEntry),
}
// No host agent, but cluster cache has data
m.clusterSensorsCache["orphan-node"] = clusterSensorsCacheEntry{
sensors: models.HostSensorSummary{
TemperatureCelsius: map[string]float64{
"cpu_package": 62.0,
},
},
updatedAt: time.Now(),
}
result := m.getHostAgentTemperatureByID("", "orphan-node")
assert.NotNil(t, result, "should fall back to cluster sensor cache")
assert.Equal(t, 62.0, result.CPUPackage)
}
func TestGetHostAgentTemperatureByID_LocalAgentTakesPriority(t *testing.T) {
m := &Monitor{
state: models.NewState(),
clusterSensorsCache: make(map[string]clusterSensorsCacheEntry),
}
// Both local agent and cluster cache have data for the same node
m.state.UpsertHost(models.Host{
ID: "host-local",
Hostname: "shared-node",
Sensors: models.HostSensorSummary{
TemperatureCelsius: map[string]float64{
"cpu_package": 70.0, // local agent reports 70
},
},
})
m.clusterSensorsCache["shared-node"] = clusterSensorsCacheEntry{
sensors: models.HostSensorSummary{
TemperatureCelsius: map[string]float64{
"cpu_package": 55.0, // cluster cache says 55
},
},
updatedAt: time.Now(),
}
result := m.getHostAgentTemperatureByID("", "shared-node")
assert.NotNil(t, result)
assert.Equal(t, 70.0, result.CPUPackage, "local agent data should take priority over cluster cache")
}
func TestGetHostAgentTemperatureByID_UsesUnifiedReadState(t *testing.T) {
now := time.Now().UTC()
registry := unifiedresources.NewRegistry(nil)
registry.IngestSnapshot(models.StateSnapshot{
Hosts: []models.Host{
{
ID: "host-readstate",
Hostname: "readstate-node",
LinkedNodeID: "node-readstate",
LastSeen: now,
Sensors: models.HostSensorSummary{
TemperatureCelsius: map[string]float64{
"cpu_package": 71.0,
},
SMART: []models.HostDiskSMART{
{Device: "sda", Temperature: 35, Standby: true},
{Device: "sdb", Temperature: 37},
},
},
},
},
})
m := &Monitor{
state: models.NewState(),
resourceStore: unifiedresources.NewMonitorAdapter(registry),
}
result := m.getHostAgentTemperatureByID("node-readstate", "ignored")
assert.NotNil(t, result)
assert.Equal(t, 71.0, result.CPUPackage)
if assert.Len(t, result.SMART, 1) {
assert.Equal(t, "/dev/sdb", result.SMART[0].Device)
}
}
func TestMergeTemperatureData_HistoricalOverrides(t *testing.T) {
t.Run("historical max update", func(t *testing.T) {
host := &models.Temperature{CPUPackage: 70.0, HasCPU: true, Available: true}
proxy := &models.Temperature{CPUPackage: 50.0, CPUMaxRecord: 60.0, HasCPU: true}
result := mergeTemperatureData(host, proxy)
assert.Equal(t, 70.0, result.CPUMaxRecord)
})
t.Run("fallback to proxy GPU and NVMe", func(t *testing.T) {
host := &models.Temperature{CPUPackage: 50.0, HasCPU: true}
proxy := &models.Temperature{
HasGPU: true,
GPU: []models.GPUTemp{{Device: "gpu0", Edge: 55.0}},
HasNVMe: true,
NVMe: []models.NVMeTemp{{Device: "nvme0", Temp: 40.0}},
}
result := mergeTemperatureData(host, proxy)
assert.True(t, result.HasGPU)
assert.True(t, result.HasNVMe)
assert.Len(t, result.GPU, 1)
assert.Len(t, result.NVMe, 1)
})
}