Pulse/internal/monitoring/monitor_metrics_chart_fallback_test.go
2026-03-18 16:06:30 +00:00

462 lines
16 KiB
Go

package monitoring
import (
"testing"
"time"
"github.com/rcourtman/pulse-go-rewrite/internal/models"
"github.com/rcourtman/pulse-go-rewrite/internal/unifiedresources"
"github.com/rcourtman/pulse-go-rewrite/pkg/metrics"
)
func newChartFallbackTestMonitor(t *testing.T) *Monitor {
t.Helper()
cfg := metrics.DefaultConfig(t.TempDir())
cfg.RetentionRaw = 90 * 24 * time.Hour
cfg.RetentionMinute = 90 * 24 * time.Hour
cfg.RetentionHourly = 90 * 24 * time.Hour
cfg.RetentionDaily = 90 * 24 * time.Hour
cfg.WriteBufferSize = 64
store, err := metrics.NewStore(cfg)
if err != nil {
t.Fatalf("failed to create metrics store: %v", err)
}
t.Cleanup(func() {
_ = store.Close()
})
return &Monitor{
metricsHistory: NewMetricsHistory(1024, 24*time.Hour),
metricsStore: store,
}
}
func writeRawMetricBatch(t *testing.T, store *metrics.Store, resourceType, resourceID, metricType string, points []MetricPoint) {
t.Helper()
records := make([]metrics.WriteMetric, len(points))
for i, point := range points {
records[i] = metrics.WriteMetric{
ResourceType: resourceType,
ResourceID: resourceID,
MetricType: metricType,
Value: point.Value,
Timestamp: point.Timestamp,
Tier: metrics.TierRaw,
}
}
store.WriteBatchSync(records)
}
func TestGetGuestMetricsForChart_ShortRangeFallsBackToStoreWhenInMemoryCoverageShallow(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
now := time.Now().UTC().Truncate(time.Second)
duration := time.Hour
inMemoryKey := "agent:host-1"
monitor.metricsHistory.AddGuestMetric(inMemoryKey, "cpu", 15, now.Add(-4*time.Minute))
monitor.metricsHistory.AddGuestMetric(inMemoryKey, "cpu", 18, now.Add(-1*time.Minute))
writeRawMetricBatch(t, monitor.metricsStore, "agent", "host-1", "cpu", []MetricPoint{
{Timestamp: now.Add(-58 * time.Minute), Value: 41},
{Timestamp: now.Add(-30 * time.Minute), Value: 43},
{Timestamp: now.Add(-1 * time.Minute), Value: 46},
})
result := monitor.GetGuestMetricsForChart(inMemoryKey, "agent", "host-1", duration)
if got, wantMin := chartMapCoverageSpan(result), 45*time.Minute; got < wantMin {
t.Fatalf("expected store-backed coverage >= %s, got %s", wantMin, got)
}
}
func TestGetNodeMetricsForChart_ShortRangeUsesInMemoryWhenCoverageSufficient(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
now := time.Now().UTC().Truncate(time.Second)
duration := time.Hour
for _, offset := range []time.Duration{-59 * time.Minute, -30 * time.Minute, -1 * time.Minute} {
monitor.metricsHistory.AddNodeMetric("node-1", "cpu", 11, now.Add(offset))
}
writeRawMetricBatch(t, monitor.metricsStore, "node", "node-1", "cpu", []MetricPoint{
{Timestamp: now.Add(-59 * time.Minute), Value: 99},
{Timestamp: now.Add(-30 * time.Minute), Value: 99},
{Timestamp: now.Add(-1 * time.Minute), Value: 99},
})
result := monitor.GetNodeMetricsForChart("node-1", "cpu", duration)
if len(result) == 0 {
t.Fatal("expected non-empty node chart series")
}
if result[0].Value != 11 {
t.Fatalf("expected in-memory series to be preferred, got first value %.2f", result[0].Value)
}
}
func TestGetNodeMetricsForChart_ShortRangeFallsBackToStoreWhenInMemoryCoverageShallow(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
now := time.Now().UTC().Truncate(time.Second)
duration := time.Hour
monitor.metricsHistory.AddNodeMetric("node-1", "cpu", 10, now.Add(-5*time.Minute))
monitor.metricsHistory.AddNodeMetric("node-1", "cpu", 12, now.Add(-1*time.Minute))
writeRawMetricBatch(t, monitor.metricsStore, "node", "node-1", "cpu", []MetricPoint{
{Timestamp: now.Add(-57 * time.Minute), Value: 60},
{Timestamp: now.Add(-25 * time.Minute), Value: 62},
{Timestamp: now.Add(-1 * time.Minute), Value: 65},
})
result := monitor.GetNodeMetricsForChart("node-1", "cpu", duration)
if got, wantMin := chartSeriesCoverageSpan(result), 45*time.Minute; got < wantMin {
t.Fatalf("expected store-backed node coverage >= %s, got %s", wantMin, got)
}
}
func TestGetStorageMetricsForChart_ShortRangeFallsBackToStoreWhenInMemoryCoverageShallow(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
now := time.Now().UTC().Truncate(time.Second)
duration := time.Hour
monitor.metricsHistory.AddStorageMetric("storage-1", "usage", 20, now.Add(-3*time.Minute))
monitor.metricsHistory.AddStorageMetric("storage-1", "usage", 21, now.Add(-1*time.Minute))
writeRawMetricBatch(t, monitor.metricsStore, "storage", "storage-1", "usage", []MetricPoint{
{Timestamp: now.Add(-56 * time.Minute), Value: 71},
{Timestamp: now.Add(-28 * time.Minute), Value: 73},
{Timestamp: now.Add(-1 * time.Minute), Value: 75},
})
result := monitor.GetStorageMetricsForChart("storage-1", duration)
if got, wantMin := chartMapCoverageSpan(result), 45*time.Minute; got < wantMin {
t.Fatalf("expected store-backed storage coverage >= %s, got %s", wantMin, got)
}
}
func TestGetPhysicalDiskTemperatureCharts_UsesUnifiedReadStateDiskViews(t *testing.T) {
t.Parallel()
registry := unifiedresources.NewRegistry(nil)
registry.IngestSnapshot(models.StateSnapshot{
PhysicalDisks: []models.PhysicalDisk{
{
ID: "disk-1",
Node: "node-a",
Instance: "lab-a",
DevPath: "/dev/nvme0n1",
Model: "Samsung PM9A3",
Serial: "SERIAL-DISK-1",
Temperature: 42,
LastChecked: time.Now().UTC(),
},
},
})
monitor := &Monitor{
state: models.NewState(),
resourceStore: unifiedresources.NewMonitorAdapter(registry),
}
charts := monitor.GetPhysicalDiskTemperatureCharts(30 * time.Minute)
entry, ok := charts["SERIAL-DISK-1"]
if !ok {
t.Fatalf("expected chart entry for canonical disk metric ID, got %#v", charts)
}
if entry.Name != "Samsung PM9A3" {
t.Fatalf("chart name = %q, want Samsung PM9A3", entry.Name)
}
if entry.Node != "node-a" {
t.Fatalf("chart node = %q, want node-a", entry.Node)
}
if entry.Instance != "lab-a" {
t.Fatalf("chart instance = %q, want lab-a", entry.Instance)
}
if len(entry.Temperature) != 2 {
t.Fatalf("expected padded 2-point sparkline series, got %d", len(entry.Temperature))
}
for _, point := range entry.Temperature {
if point.Value != 42 {
t.Fatalf("expected canonical disk temperature 42 in padded series, got %.2f", point.Value)
}
}
}
func TestGetGuestMetricsForChart_UsesGapFillLookbackWhenRequestedRangeIsEmpty(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
now := time.Now().UTC().Truncate(time.Second)
duration := time.Hour
writeRawMetricBatch(t, monitor.metricsStore, "agent", "host-1", "cpu", []MetricPoint{
{Timestamp: now.Add(-4 * time.Hour), Value: 37},
{Timestamp: now.Add(-3*time.Hour - 30*time.Minute), Value: 39},
{Timestamp: now.Add(-3 * time.Hour), Value: 42},
})
result := monitor.GetGuestMetricsForChart("agent:host-1", "agent", "host-1", duration)
cpu := result["cpu"]
if len(cpu) == 0 {
t.Fatal("expected gap-fill fallback to return historical cpu points")
}
if cpu[len(cpu)-1].Timestamp.Before(now.Add(-4*time.Hour)) || cpu[len(cpu)-1].Timestamp.After(now.Add(-3*time.Hour+time.Minute)) {
t.Fatalf("expected latest fallback point to come from older store data window, got %s", cpu[len(cpu)-1].Timestamp)
}
}
func TestGetNodeMetricsForChart_UsesGapFillLookbackWhenRequestedRangeIsEmpty(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
now := time.Now().UTC().Truncate(time.Second)
duration := time.Hour
writeRawMetricBatch(t, monitor.metricsStore, "node", "node-1", "cpu", []MetricPoint{
{Timestamp: now.Add(-5 * time.Hour), Value: 21},
{Timestamp: now.Add(-4*time.Hour - 30*time.Minute), Value: 24},
{Timestamp: now.Add(-4 * time.Hour), Value: 26},
})
result := monitor.GetNodeMetricsForChart("node-1", "cpu", duration)
if len(result) == 0 {
t.Fatal("expected gap-fill fallback to return historical node cpu points")
}
if result[len(result)-1].Timestamp.Before(now.Add(-5*time.Hour)) || result[len(result)-1].Timestamp.After(now.Add(-4*time.Hour+time.Minute)) {
t.Fatalf("expected latest fallback node point to come from older store data window, got %s", result[len(result)-1].Timestamp)
}
}
// ---------- Batch method tests ----------
func TestGetGuestMetricsForChartBatch_BatchesStoreQueries(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
now := time.Now().UTC().Truncate(time.Second)
duration := 4 * time.Hour // beyond inMemoryChartThreshold → forces store path
// Write store data for 3 guests.
for _, id := range []string{"vm-1", "vm-2", "vm-3"} {
writeRawMetricBatch(t, monitor.metricsStore, "vm", id, "cpu", []MetricPoint{
{Timestamp: now.Add(-3 * time.Hour), Value: 10},
{Timestamp: now.Add(-2 * time.Hour), Value: 20},
{Timestamp: now.Add(-1 * time.Hour), Value: 30},
})
writeRawMetricBatch(t, monitor.metricsStore, "vm", id, "memory", []MetricPoint{
{Timestamp: now.Add(-3 * time.Hour), Value: 50},
{Timestamp: now.Add(-1 * time.Hour), Value: 60},
})
}
requests := []GuestChartRequest{
{InMemoryKey: "vm-1", SQLResourceID: "vm-1"},
{InMemoryKey: "vm-2", SQLResourceID: "vm-2"},
{InMemoryKey: "vm-3", SQLResourceID: "vm-3"},
}
result := monitor.GetGuestMetricsForChartBatch("vm", requests, duration)
// Verify all 3 guests have data.
for _, id := range []string{"vm-1", "vm-2", "vm-3"} {
metrics, ok := result[id]
if !ok {
t.Fatalf("missing result for %s", id)
}
if len(metrics["cpu"]) == 0 {
t.Errorf("%s: expected cpu points from store, got none", id)
}
if len(metrics["memory"]) == 0 {
t.Errorf("%s: expected memory points from store, got none", id)
}
}
}
func TestGetGuestMetricsForChartBatch_PrefersInMemoryForShortRanges(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
now := time.Now().UTC().Truncate(time.Second)
duration := time.Hour // short range
// Seed sufficient in-memory coverage.
for _, offset := range []time.Duration{-55 * time.Minute, -30 * time.Minute, -5 * time.Minute} {
monitor.metricsHistory.AddGuestMetric("vm-1", "cpu", 11, now.Add(offset))
}
// Write different values to store to distinguish sources.
writeRawMetricBatch(t, monitor.metricsStore, "vm", "vm-1", "cpu", []MetricPoint{
{Timestamp: now.Add(-55 * time.Minute), Value: 99},
{Timestamp: now.Add(-30 * time.Minute), Value: 99},
{Timestamp: now.Add(-5 * time.Minute), Value: 99},
})
requests := []GuestChartRequest{{InMemoryKey: "vm-1", SQLResourceID: "vm-1"}}
result := monitor.GetGuestMetricsForChartBatch("vm", requests, duration)
cpuPoints := result["vm-1"]["cpu"]
if len(cpuPoints) == 0 {
t.Fatal("expected non-empty cpu series")
}
// In-memory values are 11; store values are 99. Should prefer in-memory.
if cpuPoints[0].Value != 11 {
t.Fatalf("expected in-memory value 11, got %.2f (store leak)", cpuPoints[0].Value)
}
}
func TestGetGuestMetricsForChartBatch_EmptyRequests(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
result := monitor.GetGuestMetricsForChartBatch("vm", nil, time.Hour)
if result != nil {
t.Fatalf("expected nil for empty requests, got %v", result)
}
}
func TestGetGuestMetricsForChartBatch_DifferentInMemoryAndSQLKeys(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
now := time.Now().UTC().Truncate(time.Second)
duration := 4 * time.Hour
// Docker containers use "docker:<id>" as in-memory key but just "<id>" as SQL ID.
monitor.metricsHistory.AddGuestMetric("docker:dc1", "cpu", 5, now.Add(-10*time.Minute))
writeRawMetricBatch(t, monitor.metricsStore, "dockerContainer", "dc1", "cpu", []MetricPoint{
{Timestamp: now.Add(-3 * time.Hour), Value: 40},
{Timestamp: now.Add(-2 * time.Hour), Value: 45},
{Timestamp: now.Add(-1 * time.Hour), Value: 50},
})
requests := []GuestChartRequest{{InMemoryKey: "docker:dc1", SQLResourceID: "dc1"}}
result := monitor.GetGuestMetricsForChartBatch("dockerContainer", requests, duration)
cpuPoints := result["dc1"]["cpu"]
if len(cpuPoints) == 0 {
t.Fatal("expected store-backed cpu points for docker container")
}
}
func TestGetGuestMetricsForChartBatch_PicksBestAliasCandidate(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
now := time.Now().UTC().Truncate(time.Second)
duration := 4 * time.Hour
// Write sparse data under "dockerContainer" (canonical) and better data
// under the legacy "docker" alias. The batch path should pick the alias
// with better coverage, matching single-resource alias resolution.
writeRawMetricBatch(t, monitor.metricsStore, "dockerContainer", "dc1", "cpu", []MetricPoint{
{Timestamp: now.Add(-10 * time.Minute), Value: 1},
})
writeRawMetricBatch(t, monitor.metricsStore, "docker", "dc1", "cpu", []MetricPoint{
{Timestamp: now.Add(-3 * time.Hour), Value: 40},
{Timestamp: now.Add(-2 * time.Hour), Value: 45},
{Timestamp: now.Add(-1 * time.Hour), Value: 50},
})
requests := []GuestChartRequest{{InMemoryKey: "docker:dc1", SQLResourceID: "dc1"}}
result := monitor.GetGuestMetricsForChartBatch("dockerContainer", requests, duration)
cpuPoints := result["dc1"]["cpu"]
if len(cpuPoints) < 2 {
t.Fatalf("expected multi-point series from best alias, got %d points", len(cpuPoints))
}
span := chartSeriesCoverageSpan(cpuPoints)
if span < time.Hour {
t.Fatalf("expected coverage span >= 1h from legacy alias, got %s", span)
}
}
func TestGetNodeMetricsForChartBatch_BatchesMultipleNodesAndMetricTypes(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
now := time.Now().UTC().Truncate(time.Second)
duration := 4 * time.Hour
// Write store data for 2 nodes with multiple metric types.
for _, nid := range []string{"node-1", "node-2"} {
for _, mt := range []string{"cpu", "memory", "disk"} {
writeRawMetricBatch(t, monitor.metricsStore, "node", nid, mt, []MetricPoint{
{Timestamp: now.Add(-3 * time.Hour), Value: 10},
{Timestamp: now.Add(-2 * time.Hour), Value: 20},
{Timestamp: now.Add(-1 * time.Hour), Value: 30},
})
}
}
metricTypes := []string{"cpu", "memory", "disk", "netin", "netout"}
result := monitor.GetNodeMetricsForChartBatch([]string{"node-1", "node-2"}, metricTypes, duration)
for _, nid := range []string{"node-1", "node-2"} {
nodeMetrics, ok := result[nid]
if !ok {
t.Fatalf("missing result for %s", nid)
}
for _, mt := range []string{"cpu", "memory", "disk"} {
if len(nodeMetrics[mt]) == 0 {
t.Errorf("%s/%s: expected points from store, got none", nid, mt)
}
}
}
}
func TestGetNodeMetricsForChartBatch_EmptyNodeIDs(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
result := monitor.GetNodeMetricsForChartBatch(nil, []string{"cpu"}, time.Hour)
if result != nil {
t.Fatalf("expected nil for empty nodeIDs, got %v", result)
}
}
func TestGetStorageMetricsForChartBatch_BatchesMultiplePools(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
now := time.Now().UTC().Truncate(time.Second)
duration := 4 * time.Hour
for _, sid := range []string{"storage-1", "storage-2"} {
writeRawMetricBatch(t, monitor.metricsStore, "storage", sid, "usage", []MetricPoint{
{Timestamp: now.Add(-3 * time.Hour), Value: 60},
{Timestamp: now.Add(-2 * time.Hour), Value: 65},
{Timestamp: now.Add(-1 * time.Hour), Value: 70},
})
}
result := monitor.GetStorageMetricsForChartBatch([]string{"storage-1", "storage-2"}, duration)
for _, sid := range []string{"storage-1", "storage-2"} {
metrics, ok := result[sid]
if !ok {
t.Fatalf("missing result for %s", sid)
}
if len(metrics["usage"]) == 0 {
t.Errorf("%s: expected usage points from store, got none", sid)
}
}
}
func TestGetStorageMetricsForChartBatch_EmptyStorageIDs(t *testing.T) {
t.Parallel()
monitor := newChartFallbackTestMonitor(t)
result := monitor.GetStorageMetricsForChartBatch(nil, time.Hour)
if result != nil {
t.Fatalf("expected nil for empty storageIDs, got %v", result)
}
}