mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-07 08:57:12 +00:00
2352 lines
72 KiB
Go
2352 lines
72 KiB
Go
package monitoring
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"strings"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/alerts"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/config"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/mock"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/models"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/storagehealth"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/truenas"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/unifiedresources"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/vmware"
|
|
"github.com/rcourtman/pulse-go-rewrite/pkg/metrics"
|
|
"github.com/rcourtman/pulse-go-rewrite/pkg/pbs"
|
|
"github.com/rcourtman/pulse-go-rewrite/pkg/pmg"
|
|
)
|
|
|
|
type testPollProvider struct {
|
|
providerType InstanceType
|
|
instances []string
|
|
describeInstances []PollProviderInstanceInfo
|
|
connectionStatus map[string]bool
|
|
connectionKey string
|
|
interval time.Duration
|
|
buildPollTask func(instanceName string) (PollTask, error)
|
|
}
|
|
|
|
func (p testPollProvider) Type() InstanceType { return p.providerType }
|
|
|
|
func (p testPollProvider) ListInstances(_ *Monitor) []string {
|
|
out := make([]string, len(p.instances))
|
|
copy(out, p.instances)
|
|
return out
|
|
}
|
|
|
|
func (p testPollProvider) DescribeInstances(_ *Monitor) []PollProviderInstanceInfo {
|
|
out := make([]PollProviderInstanceInfo, len(p.describeInstances))
|
|
for i := range p.describeInstances {
|
|
out[i] = PollProviderInstanceInfo{
|
|
Name: p.describeInstances[i].Name,
|
|
DisplayName: p.describeInstances[i].DisplayName,
|
|
Connection: p.describeInstances[i].Connection,
|
|
Metadata: cloneProviderMetadata(p.describeInstances[i].Metadata),
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
func (p testPollProvider) ConnectionStatuses(_ *Monitor) map[string]bool {
|
|
if len(p.connectionStatus) == 0 {
|
|
return nil
|
|
}
|
|
out := make(map[string]bool, len(p.connectionStatus))
|
|
for key, healthy := range p.connectionStatus {
|
|
out[key] = healthy
|
|
}
|
|
return out
|
|
}
|
|
|
|
func (p testPollProvider) ConnectionHealthKey(_ *Monitor, instanceName string) string {
|
|
if strings.TrimSpace(p.connectionKey) != "" {
|
|
return strings.TrimSpace(p.connectionKey)
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func (p testPollProvider) BaseInterval(_ *Monitor) time.Duration { return p.interval }
|
|
|
|
func (p testPollProvider) BuildPollTask(_ *Monitor, instanceName string) (PollTask, error) {
|
|
if p.buildPollTask == nil {
|
|
return PollTask{
|
|
InstanceName: instanceName,
|
|
InstanceType: string(p.providerType),
|
|
}, nil
|
|
}
|
|
return p.buildPollTask(instanceName)
|
|
}
|
|
|
|
type testSupplementalPollProvider struct {
|
|
testPollProvider
|
|
source unifiedresources.DataSource
|
|
ownedSources []unifiedresources.DataSource
|
|
recordsByOrg map[string][]unifiedresources.IngestRecord
|
|
changesByOrg map[string][]unifiedresources.ResourceChange
|
|
lastRequestedOrg string
|
|
}
|
|
|
|
func (p *testSupplementalPollProvider) SupplementalSource() unifiedresources.DataSource {
|
|
return p.source
|
|
}
|
|
|
|
func (p *testSupplementalPollProvider) SupplementalRecords(_ *Monitor, orgID string) []unifiedresources.IngestRecord {
|
|
p.lastRequestedOrg = orgID
|
|
records := p.recordsByOrg[orgID]
|
|
out := make([]unifiedresources.IngestRecord, len(records))
|
|
copy(out, records)
|
|
return out
|
|
}
|
|
|
|
func (p *testSupplementalPollProvider) SupplementalChanges(_ *Monitor, orgID string) []unifiedresources.ResourceChange {
|
|
p.lastRequestedOrg = orgID
|
|
return cloneTestResourceChanges(p.changesByOrg[orgID])
|
|
}
|
|
|
|
func (p *testSupplementalPollProvider) SnapshotOwnedSources(_ *Monitor) []unifiedresources.DataSource {
|
|
out := make([]unifiedresources.DataSource, len(p.ownedSources))
|
|
copy(out, p.ownedSources)
|
|
return out
|
|
}
|
|
|
|
type testMonitorSupplementalProvider struct {
|
|
recordsByOrg map[string][]unifiedresources.IngestRecord
|
|
changesByOrg map[string][]unifiedresources.ResourceChange
|
|
ownedSources []unifiedresources.DataSource
|
|
lastRequestedOrg string
|
|
}
|
|
|
|
func ptrFloat64(value float64) *float64 {
|
|
return &value
|
|
}
|
|
|
|
func ptrInt64(value int64) *int64 {
|
|
return &value
|
|
}
|
|
|
|
func cloneTestResourceChanges(in []unifiedresources.ResourceChange) []unifiedresources.ResourceChange {
|
|
if in == nil {
|
|
return nil
|
|
}
|
|
out := make([]unifiedresources.ResourceChange, len(in))
|
|
for i := range in {
|
|
out[i] = in[i]
|
|
if in[i].OccurredAt != nil {
|
|
occurredAt := in[i].OccurredAt.UTC()
|
|
out[i].OccurredAt = &occurredAt
|
|
}
|
|
if in[i].RelatedResources != nil {
|
|
out[i].RelatedResources = append([]string(nil), in[i].RelatedResources...)
|
|
}
|
|
if in[i].Metadata != nil {
|
|
out[i].Metadata = make(map[string]any, len(in[i].Metadata))
|
|
for key, value := range in[i].Metadata {
|
|
out[i].Metadata[key] = value
|
|
}
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
func (p *testMonitorSupplementalProvider) SupplementalRecords(_ *Monitor, orgID string) []unifiedresources.IngestRecord {
|
|
p.lastRequestedOrg = orgID
|
|
records := p.recordsByOrg[orgID]
|
|
out := make([]unifiedresources.IngestRecord, len(records))
|
|
copy(out, records)
|
|
return out
|
|
}
|
|
|
|
func (p *testMonitorSupplementalProvider) SupplementalChanges(_ *Monitor, orgID string) []unifiedresources.ResourceChange {
|
|
p.lastRequestedOrg = orgID
|
|
return cloneTestResourceChanges(p.changesByOrg[orgID])
|
|
}
|
|
|
|
func (p *testMonitorSupplementalProvider) SnapshotOwnedSources() []unifiedresources.DataSource {
|
|
out := make([]unifiedresources.DataSource, len(p.ownedSources))
|
|
copy(out, p.ownedSources)
|
|
return out
|
|
}
|
|
|
|
type testSupplementalResourceStore struct {
|
|
snapshotCalls int
|
|
lastSnapshot models.StateSnapshot
|
|
recordsBySource map[unifiedresources.DataSource][]unifiedresources.IngestRecord
|
|
recordedChanges []unifiedresources.ResourceChange
|
|
}
|
|
|
|
func (s *testSupplementalResourceStore) ShouldSkipAPIPolling(string) bool { return false }
|
|
|
|
func (s *testSupplementalResourceStore) GetPollingRecommendations() map[string]float64 { return nil }
|
|
|
|
func (s *testSupplementalResourceStore) GetAll() []unifiedresources.Resource { return nil }
|
|
|
|
func (s *testSupplementalResourceStore) PopulateFromSnapshot(snapshot models.StateSnapshot) {
|
|
s.lastSnapshot = snapshot
|
|
s.snapshotCalls++
|
|
}
|
|
|
|
func (s *testSupplementalResourceStore) PopulateSupplementalRecords(source unifiedresources.DataSource, records []unifiedresources.IngestRecord) {
|
|
if s.recordsBySource == nil {
|
|
s.recordsBySource = make(map[unifiedresources.DataSource][]unifiedresources.IngestRecord)
|
|
}
|
|
cloned := make([]unifiedresources.IngestRecord, len(records))
|
|
copy(cloned, records)
|
|
s.recordsBySource[source] = append(s.recordsBySource[source], cloned...)
|
|
}
|
|
|
|
func (s *testSupplementalResourceStore) RecordChange(change unifiedresources.ResourceChange) error {
|
|
s.recordedChanges = append(s.recordedChanges, cloneTestResourceChanges([]unifiedresources.ResourceChange{change})...)
|
|
return nil
|
|
}
|
|
|
|
type testAtomicResourceStore struct {
|
|
snapshotCalls int
|
|
atomicCalls int
|
|
lastSnapshot models.StateSnapshot
|
|
lastRecordsBySrc map[unifiedresources.DataSource][]unifiedresources.IngestRecord
|
|
}
|
|
|
|
func (s *testAtomicResourceStore) ShouldSkipAPIPolling(string) bool { return false }
|
|
|
|
func (s *testAtomicResourceStore) GetPollingRecommendations() map[string]float64 { return nil }
|
|
|
|
func (s *testAtomicResourceStore) GetAll() []unifiedresources.Resource { return nil }
|
|
|
|
func (s *testAtomicResourceStore) PopulateFromSnapshot(snapshot models.StateSnapshot) {
|
|
s.lastSnapshot = snapshot
|
|
s.snapshotCalls++
|
|
}
|
|
|
|
func (s *testAtomicResourceStore) PopulateSnapshotAndSupplemental(snapshot models.StateSnapshot, recordsBySource map[unifiedresources.DataSource][]unifiedresources.IngestRecord) {
|
|
s.lastSnapshot = snapshot
|
|
s.atomicCalls++
|
|
s.lastRecordsBySrc = make(map[unifiedresources.DataSource][]unifiedresources.IngestRecord, len(recordsBySource))
|
|
for source, records := range recordsBySource {
|
|
cloned := make([]unifiedresources.IngestRecord, len(records))
|
|
copy(cloned, records)
|
|
s.lastRecordsBySrc[source] = cloned
|
|
}
|
|
}
|
|
|
|
func TestBuildScheduledTasksUsesConfiguredIntervals(t *testing.T) {
|
|
now := time.Now()
|
|
cfg := &config.Config{
|
|
PVEPollingInterval: 2 * time.Minute,
|
|
PBSPollingInterval: 45 * time.Second,
|
|
PMGPollingInterval: 90 * time.Second,
|
|
AdaptivePollingBaseInterval: 10 * time.Second,
|
|
}
|
|
|
|
monitor := &Monitor{
|
|
config: cfg,
|
|
pveClients: map[string]PVEClientInterface{"pve-1": nil},
|
|
pbsClients: map[string]*pbs.Client{"pbs-1": nil},
|
|
pmgClients: map[string]*pmg.Client{"pmg-1": nil},
|
|
}
|
|
|
|
tasks := monitor.buildScheduledTasks(now)
|
|
if len(tasks) != 3 {
|
|
t.Fatalf("expected 3 tasks, got %d", len(tasks))
|
|
}
|
|
|
|
got := map[InstanceType]time.Duration{}
|
|
for _, task := range tasks {
|
|
if !task.NextRun.Equal(now) {
|
|
t.Fatalf("expected NextRun to equal provided time, got %v", task.NextRun)
|
|
}
|
|
got[task.InstanceType] = task.Interval
|
|
}
|
|
|
|
if got[InstanceTypePVE] != cfg.PVEPollingInterval {
|
|
t.Fatalf("expected PVE interval %v, got %v", cfg.PVEPollingInterval, got[InstanceTypePVE])
|
|
}
|
|
if got[InstanceTypePBS] != cfg.PBSPollingInterval {
|
|
t.Fatalf("expected PBS interval %v, got %v", cfg.PBSPollingInterval, got[InstanceTypePBS])
|
|
}
|
|
if got[InstanceTypePMG] != cfg.PMGPollingInterval {
|
|
t.Fatalf("expected PMG interval %v, got %v", cfg.PMGPollingInterval, got[InstanceTypePMG])
|
|
}
|
|
}
|
|
|
|
func TestRescheduleTaskUsesInstanceIntervalWhenSchedulerDisabled(t *testing.T) {
|
|
cfg := &config.Config{
|
|
PVEPollingInterval: 75 * time.Second,
|
|
AdaptivePollingBaseInterval: 10 * time.Second,
|
|
}
|
|
|
|
monitor := &Monitor{
|
|
config: cfg,
|
|
taskQueue: NewTaskQueue(),
|
|
}
|
|
|
|
task := ScheduledTask{
|
|
InstanceName: "pve-1",
|
|
InstanceType: InstanceTypePVE,
|
|
Interval: 0,
|
|
NextRun: time.Now(),
|
|
}
|
|
|
|
monitor.rescheduleTask(task)
|
|
|
|
monitor.taskQueue.mu.Lock()
|
|
entry, ok := monitor.taskQueue.entries[schedulerKey(task.InstanceType, task.InstanceName)]
|
|
monitor.taskQueue.mu.Unlock()
|
|
if !ok {
|
|
t.Fatalf("expected task to be rescheduled in queue")
|
|
}
|
|
|
|
if entry.task.Interval != cfg.PVEPollingInterval {
|
|
t.Fatalf("expected interval %v, got %v", cfg.PVEPollingInterval, entry.task.Interval)
|
|
}
|
|
|
|
remaining := time.Until(entry.task.NextRun)
|
|
if remaining < cfg.PVEPollingInterval-2*time.Second || remaining > cfg.PVEPollingInterval+time.Second {
|
|
t.Fatalf("expected NextRun about %v from now, got %v", cfg.PVEPollingInterval, remaining)
|
|
}
|
|
}
|
|
|
|
func TestUpdateResourceStoreSyncsUnifiedIncidentAlerts(t *testing.T) {
|
|
alertManager := alerts.NewManagerWithDataDir(t.TempDir())
|
|
defer alertManager.Stop()
|
|
|
|
store := unifiedresources.NewMonitorAdapter(nil)
|
|
monitor := &Monitor{
|
|
state: models.NewState(),
|
|
resourceStore: store,
|
|
alertManager: alertManager,
|
|
orgID: "default",
|
|
supplementalProviders: map[unifiedresources.DataSource]MonitorSupplementalRecordsProvider{
|
|
unifiedresources.SourceTrueNAS: &testMonitorSupplementalProvider{
|
|
recordsByOrg: map[string][]unifiedresources.IngestRecord{
|
|
"default": {{
|
|
SourceID: "pool:tank",
|
|
Resource: unifiedresources.Resource{
|
|
ID: "storage:tank",
|
|
Type: unifiedresources.ResourceTypeStorage,
|
|
Name: "tank",
|
|
ParentName: "truenas-main",
|
|
Sources: []unifiedresources.DataSource{unifiedresources.SourceTrueNAS},
|
|
Storage: &unifiedresources.StorageMeta{
|
|
Platform: "truenas",
|
|
Topology: "pool",
|
|
Protection: "zfs",
|
|
IsZFS: true,
|
|
},
|
|
Incidents: []unifiedresources.ResourceIncident{{
|
|
Provider: "truenas",
|
|
NativeID: "alert-1",
|
|
Code: "truenas_volume_status",
|
|
Severity: storagehealth.RiskWarning,
|
|
Summary: "Pool tank is DEGRADED",
|
|
}},
|
|
},
|
|
}},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
monitor.updateResourceStore(models.StateSnapshot{})
|
|
|
|
active := alertManager.GetActiveAlerts()
|
|
if len(active) != 1 {
|
|
t.Fatalf("expected 1 active alert, got %d", len(active))
|
|
}
|
|
if active[0].Type != "zfs-pool-state" {
|
|
t.Fatalf("alert type = %q, want zfs-pool-state", active[0].Type)
|
|
}
|
|
|
|
snapshot := monitor.state.GetSnapshot()
|
|
if len(snapshot.ActiveAlerts) != 1 {
|
|
t.Fatalf("expected state snapshot to contain 1 active alert, got %d", len(snapshot.ActiveAlerts))
|
|
}
|
|
}
|
|
|
|
func TestUpdateResourceStoreSyncsCanonicalStorageMetrics(t *testing.T) {
|
|
t.Run("supplemental truenas storage", func(t *testing.T) {
|
|
cfg := metrics.DefaultConfig(t.TempDir())
|
|
store, err := metrics.NewStore(cfg)
|
|
if err != nil {
|
|
t.Fatalf("metrics.NewStore() error = %v", err)
|
|
}
|
|
defer func() { _ = store.Close() }()
|
|
|
|
resourceStore := unifiedresources.NewMonitorAdapter(nil)
|
|
used := int64(620)
|
|
total := int64(1000)
|
|
monitor := &Monitor{
|
|
resourceStore: resourceStore,
|
|
metricsHistory: NewMetricsHistory(1024, 24*time.Hour),
|
|
metricsStore: store,
|
|
orgID: "default",
|
|
supplementalProviders: map[unifiedresources.DataSource]MonitorSupplementalRecordsProvider{
|
|
unifiedresources.SourceTrueNAS: &testMonitorSupplementalProvider{
|
|
recordsByOrg: map[string][]unifiedresources.IngestRecord{
|
|
"default": {{
|
|
SourceID: "pool:tank",
|
|
Resource: unifiedresources.Resource{
|
|
ID: "storage:tank",
|
|
Type: unifiedresources.ResourceTypeStorage,
|
|
Name: "tank",
|
|
Status: unifiedresources.StatusOnline,
|
|
LastSeen: time.Now().UTC(),
|
|
UpdatedAt: time.Now().UTC(),
|
|
Sources: []unifiedresources.DataSource{unifiedresources.SourceTrueNAS},
|
|
Metrics: &unifiedresources.ResourceMetrics{
|
|
Disk: &unifiedresources.MetricValue{Used: &used, Total: &total, Percent: 62},
|
|
},
|
|
Storage: &unifiedresources.StorageMeta{
|
|
Platform: "truenas",
|
|
Topology: "pool",
|
|
Protection: "zfs",
|
|
},
|
|
},
|
|
}},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
monitor.updateResourceStore(models.StateSnapshot{})
|
|
|
|
memory := monitor.GetStorageMetrics("pool:tank", time.Hour)
|
|
if got := len(memory["usage"]); got == 0 {
|
|
t.Fatalf("expected in-memory usage history for pool:tank")
|
|
}
|
|
|
|
storeBacked := monitor.GetStorageMetricsForChart("pool:tank", 7*24*time.Hour)
|
|
if got := len(storeBacked["usage"]); got == 0 {
|
|
t.Fatalf("expected persisted usage history for pool:tank")
|
|
}
|
|
})
|
|
|
|
t.Run("agent unraid storage", func(t *testing.T) {
|
|
cfg := metrics.DefaultConfig(t.TempDir())
|
|
store, err := metrics.NewStore(cfg)
|
|
if err != nil {
|
|
t.Fatalf("metrics.NewStore() error = %v", err)
|
|
}
|
|
defer func() { _ = store.Close() }()
|
|
|
|
resourceStore := unifiedresources.NewMonitorAdapter(nil)
|
|
snapshot := models.StateSnapshot{
|
|
Hosts: []models.Host{
|
|
{
|
|
ID: "host-tower",
|
|
Hostname: "tower",
|
|
Status: "online",
|
|
LastSeen: time.Now().UTC(),
|
|
MachineID: "machine-tower",
|
|
Disks: []models.Disk{
|
|
{Mountpoint: "/mnt/user", Total: 1000, Used: 400, Free: 600, Usage: 40},
|
|
},
|
|
Unraid: &models.HostUnraidStorage{
|
|
ArrayStarted: true,
|
|
ArrayState: "STARTED",
|
|
NumProtected: 1,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
monitor := &Monitor{
|
|
resourceStore: resourceStore,
|
|
metricsHistory: NewMetricsHistory(1024, 24*time.Hour),
|
|
metricsStore: store,
|
|
}
|
|
|
|
monitor.updateResourceStore(snapshot)
|
|
|
|
var storageResourceID string
|
|
for _, resource := range resourceStore.GetAll() {
|
|
if resource.Type == unifiedresources.ResourceTypeStorage && resource.Storage != nil && resource.Storage.Platform == "unraid" {
|
|
storageResourceID = resource.ID
|
|
break
|
|
}
|
|
}
|
|
if storageResourceID == "" {
|
|
t.Fatal("expected unraid storage resource in unified store")
|
|
}
|
|
|
|
target := resourceStore.MetricsTargetForResource(storageResourceID)
|
|
if target == nil || target.ResourceType != "storage" || target.ResourceID != "host-tower/storage:unraid-array" {
|
|
t.Fatalf("unexpected unraid storage metrics target %+v", target)
|
|
}
|
|
|
|
memory := monitor.GetStorageMetrics(target.ResourceID, time.Hour)
|
|
if got := len(memory["usage"]); got == 0 {
|
|
t.Fatalf("expected in-memory usage history for %s", target.ResourceID)
|
|
}
|
|
|
|
storeBacked := monitor.GetStorageMetricsForChart(target.ResourceID, 7*24*time.Hour)
|
|
if got := len(storeBacked["usage"]); got == 0 {
|
|
t.Fatalf("expected persisted usage history for %s", target.ResourceID)
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestBuildBroadcastFrontendStateIncludesUnifiedIncidentAlerts(t *testing.T) {
|
|
alertManager := alerts.NewManagerWithDataDir(t.TempDir())
|
|
defer alertManager.Stop()
|
|
|
|
store := unifiedresources.NewMonitorAdapter(nil)
|
|
monitor := &Monitor{
|
|
state: models.NewState(),
|
|
resourceStore: store,
|
|
alertManager: alertManager,
|
|
orgID: "default",
|
|
supplementalProviders: map[unifiedresources.DataSource]MonitorSupplementalRecordsProvider{
|
|
unifiedresources.SourceTrueNAS: &testMonitorSupplementalProvider{
|
|
recordsByOrg: map[string][]unifiedresources.IngestRecord{
|
|
"default": {{
|
|
SourceID: "system:truenas-main",
|
|
Resource: unifiedresources.Resource{
|
|
ID: "agent:truenas-main",
|
|
Type: unifiedresources.ResourceTypeAgent,
|
|
Name: "truenas-main",
|
|
Sources: []unifiedresources.DataSource{unifiedresources.SourceTrueNAS},
|
|
TrueNAS: &unifiedresources.TrueNASData{Hostname: "truenas-main"},
|
|
Incidents: []unifiedresources.ResourceIncident{{
|
|
Provider: "truenas",
|
|
NativeID: "alert-2",
|
|
Code: "truenas_volume_status",
|
|
Severity: storagehealth.RiskCritical,
|
|
Summary: "Pool tank is FAULTED",
|
|
}},
|
|
},
|
|
}},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
frontend := monitor.buildBroadcastFrontendStateFromSnapshot(models.StateSnapshot{})
|
|
|
|
if len(frontend.ActiveAlerts) != 1 {
|
|
t.Fatalf("expected 1 active alert in frontend state, got %d", len(frontend.ActiveAlerts))
|
|
}
|
|
if frontend.ActiveAlerts[0].Type != "resource-incident" {
|
|
t.Fatalf("alert type = %q, want resource-incident", frontend.ActiveAlerts[0].Type)
|
|
}
|
|
}
|
|
|
|
func TestSyncUnifiedAppContainerMetricsRecordsTrueNASHistory(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() {
|
|
truenas.SetFeatureEnabled(previous)
|
|
})
|
|
|
|
cfg := metrics.DefaultConfig(t.TempDir())
|
|
store, err := metrics.NewStore(cfg)
|
|
if err != nil {
|
|
t.Fatalf("metrics.NewStore() error = %v", err)
|
|
}
|
|
defer func() { _ = store.Close() }()
|
|
|
|
resourceStore := unifiedresources.NewMonitorAdapter(nil)
|
|
records := truenas.NewProvider(truenas.DefaultFixtures()).Records()
|
|
resourceStore.PopulateSnapshotAndSupplemental(models.StateSnapshot{}, map[unifiedresources.DataSource][]unifiedresources.IngestRecord{
|
|
unifiedresources.SourceTrueNAS: records,
|
|
})
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: resourceStore,
|
|
metricsHistory: NewMetricsHistory(1024, 24*time.Hour),
|
|
metricsStore: store,
|
|
}
|
|
|
|
monitor.syncUnifiedAppContainerMetrics(resourceStore)
|
|
|
|
var appResourceID string
|
|
for _, resource := range resourceStore.GetAll() {
|
|
if resource.Type == unifiedresources.ResourceTypeAppContainer && resource.Name == "Nextcloud" {
|
|
appResourceID = resource.ID
|
|
break
|
|
}
|
|
}
|
|
if appResourceID == "" {
|
|
t.Fatal("expected Nextcloud app-container resource in unified store")
|
|
}
|
|
|
|
target := resourceStore.MetricsTargetForResource(appResourceID)
|
|
if target == nil || target.ResourceType != "app-container" || target.ResourceID != "nextcloud" {
|
|
t.Fatalf("unexpected app-container metrics target %+v", target)
|
|
}
|
|
|
|
inMemory := monitor.GetGuestMetrics("docker:nextcloud", time.Hour)
|
|
if got := len(inMemory["cpu"]); got == 0 {
|
|
t.Fatalf("expected in-memory cpu history for nextcloud")
|
|
}
|
|
if got := len(inMemory["netin"]); got == 0 {
|
|
t.Fatalf("expected in-memory network history for nextcloud")
|
|
}
|
|
|
|
storeBacked := monitor.GetGuestMetricsForChart("docker:nextcloud", "dockerContainer", "nextcloud", 7*24*time.Hour)
|
|
if got := len(storeBacked["cpu"]); got == 0 {
|
|
t.Fatalf("expected persisted cpu history for nextcloud")
|
|
}
|
|
}
|
|
|
|
func TestSyncUnifiedAppContainerMetricsSkipsMockOwnedTrueNASHistoryWhenMockEnabled(t *testing.T) {
|
|
previousFeature := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() {
|
|
truenas.SetFeatureEnabled(previousFeature)
|
|
})
|
|
|
|
previousMock := mock.IsMockEnabled()
|
|
mock.SetEnabled(true)
|
|
t.Cleanup(func() {
|
|
mock.SetEnabled(previousMock)
|
|
})
|
|
|
|
cfg := metrics.DefaultConfig(t.TempDir())
|
|
store, err := metrics.NewStore(cfg)
|
|
if err != nil {
|
|
t.Fatalf("metrics.NewStore() error = %v", err)
|
|
}
|
|
defer func() { _ = store.Close() }()
|
|
|
|
resourceStore := unifiedresources.NewMonitorAdapter(nil)
|
|
resourceStore.PopulateSnapshotAndSupplemental(models.StateSnapshot{}, map[unifiedresources.DataSource][]unifiedresources.IngestRecord{
|
|
unifiedresources.SourceTrueNAS: truenas.NewProvider(truenas.DefaultFixtures()).Records(),
|
|
})
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: resourceStore,
|
|
metricsHistory: NewMetricsHistory(1024, 24*time.Hour),
|
|
metricsStore: store,
|
|
}
|
|
|
|
monitor.syncUnifiedAppContainerMetrics(resourceStore)
|
|
|
|
if got := len(monitor.GetGuestMetrics("docker:nextcloud", time.Hour)["cpu"]); got != 0 {
|
|
t.Fatalf("expected mock-owned TrueNAS app history to be skipped, got %d cpu points", got)
|
|
}
|
|
}
|
|
|
|
func TestSyncUnifiedAgentMetricsRecordsTrueNASHostHistory(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() {
|
|
truenas.SetFeatureEnabled(previous)
|
|
})
|
|
|
|
cfg := metrics.DefaultConfig(t.TempDir())
|
|
store, err := metrics.NewStore(cfg)
|
|
if err != nil {
|
|
t.Fatalf("metrics.NewStore() error = %v", err)
|
|
}
|
|
defer func() { _ = store.Close() }()
|
|
|
|
resourceStore := unifiedresources.NewMonitorAdapter(nil)
|
|
records := truenas.NewProvider(truenas.DefaultFixtures()).Records()
|
|
resourceStore.PopulateSnapshotAndSupplemental(models.StateSnapshot{}, map[unifiedresources.DataSource][]unifiedresources.IngestRecord{
|
|
unifiedresources.SourceTrueNAS: records,
|
|
})
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: resourceStore,
|
|
metricsHistory: NewMetricsHistory(1024, 24*time.Hour),
|
|
metricsStore: store,
|
|
}
|
|
|
|
monitor.syncUnifiedAgentMetrics(resourceStore)
|
|
|
|
var systemResourceID string
|
|
for _, resource := range resourceStore.GetAll() {
|
|
if resource.Type == unifiedresources.ResourceTypeAgent && resource.Name == "truenas-main" {
|
|
systemResourceID = resource.ID
|
|
break
|
|
}
|
|
}
|
|
if systemResourceID == "" {
|
|
t.Fatal("expected TrueNAS system resource in unified store")
|
|
}
|
|
|
|
target := resourceStore.MetricsTargetForResource(systemResourceID)
|
|
if target == nil || target.ResourceType != "agent" || target.ResourceID != "truenas-main" {
|
|
t.Fatalf("unexpected agent metrics target %+v", target)
|
|
}
|
|
|
|
inMemory := monitor.GetGuestMetrics("agent:truenas-main", time.Hour)
|
|
if got := len(inMemory["cpu"]); got == 0 {
|
|
t.Fatalf("expected in-memory cpu history for truenas-main")
|
|
}
|
|
if got := len(inMemory["netin"]); got == 0 {
|
|
t.Fatalf("expected in-memory network history for truenas-main")
|
|
}
|
|
|
|
storeBacked := monitor.GetGuestMetricsForChart("agent:truenas-main", "agent", "truenas-main", 7*24*time.Hour)
|
|
if got := len(storeBacked["cpu"]); got == 0 {
|
|
t.Fatalf("expected persisted cpu history for truenas-main")
|
|
}
|
|
}
|
|
|
|
func TestSyncUnifiedAgentMetricsSkipsMockOwnedProviderHistoryWhenMockEnabled(t *testing.T) {
|
|
previousTrueNAS := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() {
|
|
truenas.SetFeatureEnabled(previousTrueNAS)
|
|
})
|
|
|
|
previousMock := mock.IsMockEnabled()
|
|
mock.SetEnabled(true)
|
|
t.Cleanup(func() {
|
|
mock.SetEnabled(previousMock)
|
|
})
|
|
|
|
cfg := metrics.DefaultConfig(t.TempDir())
|
|
store, err := metrics.NewStore(cfg)
|
|
if err != nil {
|
|
t.Fatalf("metrics.NewStore() error = %v", err)
|
|
}
|
|
defer func() { _ = store.Close() }()
|
|
|
|
resourceStore := unifiedresources.NewMonitorAdapter(nil)
|
|
resourceStore.PopulateSnapshotAndSupplemental(models.StateSnapshot{}, map[unifiedresources.DataSource][]unifiedresources.IngestRecord{
|
|
unifiedresources.SourceTrueNAS: truenas.NewProvider(truenas.DefaultFixtures()).Records(),
|
|
})
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: resourceStore,
|
|
metricsHistory: NewMetricsHistory(1024, 24*time.Hour),
|
|
metricsStore: store,
|
|
}
|
|
|
|
monitor.syncUnifiedAgentMetrics(resourceStore)
|
|
|
|
if got := len(monitor.GetGuestMetrics("agent:truenas-main", time.Hour)["cpu"]); got != 0 {
|
|
t.Fatalf("expected mock-owned TrueNAS host history to be skipped, got %d cpu points", got)
|
|
}
|
|
}
|
|
|
|
func TestSyncUnifiedAgentMetricsRecordsVMwareHostHistory(t *testing.T) {
|
|
previous := vmware.IsFeatureEnabled()
|
|
vmware.SetFeatureEnabled(true)
|
|
t.Cleanup(func() {
|
|
vmware.SetFeatureEnabled(previous)
|
|
})
|
|
|
|
cfg := metrics.DefaultConfig(t.TempDir())
|
|
store, err := metrics.NewStore(cfg)
|
|
if err != nil {
|
|
t.Fatalf("metrics.NewStore() error = %v", err)
|
|
}
|
|
defer func() { _ = store.Close() }()
|
|
|
|
resourceStore := unifiedresources.NewMonitorAdapter(nil)
|
|
records := vmware.NewProvider(vmware.InventorySnapshot{
|
|
ConnectionID: "vc-1",
|
|
ConnectionName: "Lab VC",
|
|
VCenterHost: "vc.lab.local",
|
|
CollectedAt: time.Date(2026, time.March, 30, 18, 15, 0, 0, time.UTC),
|
|
Hosts: []vmware.InventoryHost{{
|
|
Host: "host-101",
|
|
Name: "esxi-01.lab.local",
|
|
ConnectionState: "CONNECTED",
|
|
PowerState: "POWERED_ON",
|
|
HostUUID: "uuid-host-1",
|
|
Metrics: &vmware.InventoryMetrics{
|
|
CPUPercent: ptrFloat64(21.4),
|
|
MemoryPercent: ptrFloat64(63.2),
|
|
MemoryUsedBytes: ptrInt64(27144105984),
|
|
MemoryTotalBytes: ptrInt64(42949672960),
|
|
NetInBytesPerSecond: ptrFloat64(1024),
|
|
NetOutBytesPerSecond: ptrFloat64(2048),
|
|
DiskReadBytesPerSecond: ptrFloat64(4096),
|
|
DiskWriteBytesPerSecond: ptrFloat64(8192),
|
|
},
|
|
}},
|
|
}).Records()
|
|
resourceStore.PopulateSnapshotAndSupplemental(models.StateSnapshot{}, map[unifiedresources.DataSource][]unifiedresources.IngestRecord{
|
|
unifiedresources.SourceVMware: records,
|
|
})
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: resourceStore,
|
|
metricsHistory: NewMetricsHistory(1024, 24*time.Hour),
|
|
metricsStore: store,
|
|
}
|
|
|
|
monitor.syncUnifiedAgentMetrics(resourceStore)
|
|
|
|
var systemResourceID string
|
|
for _, resource := range resourceStore.GetAll() {
|
|
if resource.Type == unifiedresources.ResourceTypeAgent && resource.Name == "esxi-01.lab.local" {
|
|
systemResourceID = resource.ID
|
|
break
|
|
}
|
|
}
|
|
if systemResourceID == "" {
|
|
t.Fatal("expected VMware host resource in unified store")
|
|
}
|
|
|
|
target := resourceStore.MetricsTargetForResource(systemResourceID)
|
|
if target == nil || target.ResourceType != "agent" || target.ResourceID != "vc-1:host:host-101" {
|
|
t.Fatalf("unexpected agent metrics target %+v", target)
|
|
}
|
|
|
|
inMemory := monitor.GetGuestMetrics("agent:vc-1:host:host-101", time.Hour)
|
|
if got := len(inMemory["cpu"]); got == 0 {
|
|
t.Fatalf("expected in-memory cpu history for VMware host")
|
|
}
|
|
if got := len(inMemory["netin"]); got == 0 {
|
|
t.Fatalf("expected in-memory network history for VMware host")
|
|
}
|
|
|
|
storeBacked := monitor.GetGuestMetricsForChart("agent:vc-1:host:host-101", "agent", "vc-1:host:host-101", 7*24*time.Hour)
|
|
if got := len(storeBacked["cpu"]); got == 0 {
|
|
t.Fatalf("expected persisted cpu history for VMware host")
|
|
}
|
|
}
|
|
|
|
func TestSyncUnifiedVMMetricsRecordsVMwareVMHistory(t *testing.T) {
|
|
previous := vmware.IsFeatureEnabled()
|
|
vmware.SetFeatureEnabled(true)
|
|
t.Cleanup(func() {
|
|
vmware.SetFeatureEnabled(previous)
|
|
})
|
|
|
|
cfg := metrics.DefaultConfig(t.TempDir())
|
|
store, err := metrics.NewStore(cfg)
|
|
if err != nil {
|
|
t.Fatalf("metrics.NewStore() error = %v", err)
|
|
}
|
|
defer func() { _ = store.Close() }()
|
|
|
|
resourceStore := unifiedresources.NewMonitorAdapter(nil)
|
|
records := vmware.NewProvider(vmware.InventorySnapshot{
|
|
ConnectionID: "vc-1",
|
|
ConnectionName: "Lab VC",
|
|
VCenterHost: "vc.lab.local",
|
|
CollectedAt: time.Date(2026, time.March, 30, 18, 15, 0, 0, time.UTC),
|
|
VMs: []vmware.InventoryVM{{
|
|
VM: "vm-201",
|
|
Name: "app-01",
|
|
PowerState: "POWERED_ON",
|
|
CPUCount: 4,
|
|
MemorySizeMiB: 8192,
|
|
Metrics: &vmware.InventoryMetrics{
|
|
CPUPercent: ptrFloat64(38.1),
|
|
MemoryPercent: ptrFloat64(57.5),
|
|
MemoryUsedBytes: ptrInt64(5033164800),
|
|
MemoryTotalBytes: ptrInt64(8589934592),
|
|
NetInBytesPerSecond: ptrFloat64(512),
|
|
NetOutBytesPerSecond: ptrFloat64(768),
|
|
DiskReadBytesPerSecond: ptrFloat64(1536),
|
|
DiskWriteBytesPerSecond: ptrFloat64(2048),
|
|
},
|
|
}},
|
|
}).Records()
|
|
resourceStore.PopulateSnapshotAndSupplemental(models.StateSnapshot{}, map[unifiedresources.DataSource][]unifiedresources.IngestRecord{
|
|
unifiedresources.SourceVMware: records,
|
|
})
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: resourceStore,
|
|
metricsHistory: NewMetricsHistory(1024, 24*time.Hour),
|
|
metricsStore: store,
|
|
}
|
|
|
|
monitor.syncUnifiedVMMetrics(resourceStore)
|
|
|
|
var vmResourceID string
|
|
for _, resource := range resourceStore.GetAll() {
|
|
if resource.Type == unifiedresources.ResourceTypeVM && resource.Name == "app-01" {
|
|
vmResourceID = resource.ID
|
|
break
|
|
}
|
|
}
|
|
if vmResourceID == "" {
|
|
t.Fatal("expected VMware VM resource in unified store")
|
|
}
|
|
|
|
target := resourceStore.MetricsTargetForResource(vmResourceID)
|
|
if target == nil || target.ResourceType != "vm" || target.ResourceID != "vc-1:vm:vm-201" {
|
|
t.Fatalf("unexpected vm metrics target %+v", target)
|
|
}
|
|
|
|
inMemory := monitor.GetGuestMetrics("vc-1:vm:vm-201", time.Hour)
|
|
if got := len(inMemory["cpu"]); got == 0 {
|
|
t.Fatalf("expected in-memory cpu history for VMware VM")
|
|
}
|
|
if got := len(inMemory["netin"]); got == 0 {
|
|
t.Fatalf("expected in-memory network history for VMware VM")
|
|
}
|
|
|
|
storeBacked := monitor.GetGuestMetricsForChart("vc-1:vm:vm-201", "vm", "vc-1:vm:vm-201", 7*24*time.Hour)
|
|
if got := len(storeBacked["cpu"]); got == 0 {
|
|
t.Fatalf("expected persisted cpu history for VMware VM")
|
|
}
|
|
}
|
|
|
|
func TestSyncUnifiedPhysicalDiskMetricsRecordsTrueNASDiskHistory(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() {
|
|
truenas.SetFeatureEnabled(previous)
|
|
})
|
|
|
|
cfg := metrics.DefaultConfig(t.TempDir())
|
|
store, err := metrics.NewStore(cfg)
|
|
if err != nil {
|
|
t.Fatalf("metrics.NewStore() error = %v", err)
|
|
}
|
|
defer func() { _ = store.Close() }()
|
|
|
|
resourceStore := unifiedresources.NewMonitorAdapter(nil)
|
|
records := truenas.NewProvider(truenas.DefaultFixtures()).Records()
|
|
resourceStore.PopulateSnapshotAndSupplemental(models.StateSnapshot{}, map[unifiedresources.DataSource][]unifiedresources.IngestRecord{
|
|
unifiedresources.SourceTrueNAS: records,
|
|
})
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: resourceStore,
|
|
metricsStore: store,
|
|
}
|
|
|
|
monitor.syncUnifiedPhysicalDiskMetrics(resourceStore)
|
|
|
|
var diskResourceID string
|
|
for _, resource := range resourceStore.GetAll() {
|
|
if resource.Type != unifiedresources.ResourceTypePhysicalDisk || resource.PhysicalDisk == nil {
|
|
continue
|
|
}
|
|
if strings.TrimSpace(resource.PhysicalDisk.DevPath) == "/dev/sdc" {
|
|
diskResourceID = resource.ID
|
|
break
|
|
}
|
|
}
|
|
if diskResourceID == "" {
|
|
t.Fatal("expected TrueNAS sdc disk resource in unified store")
|
|
}
|
|
|
|
target := resourceStore.MetricsTargetForResource(diskResourceID)
|
|
if target == nil || target.ResourceType != "disk" || target.ResourceID != "WD-WX12A3456" {
|
|
t.Fatalf("unexpected physical-disk metrics target %+v", target)
|
|
}
|
|
|
|
charts := monitor.GetPhysicalDiskTemperatureCharts(7 * 24 * time.Hour)
|
|
entry, ok := charts[target.ResourceID]
|
|
if !ok {
|
|
t.Fatalf("expected persisted disk chart for %s, got %#v", target.ResourceID, charts)
|
|
}
|
|
if entry.Node != "truenas-main" {
|
|
t.Fatalf("chart node = %q, want truenas-main", entry.Node)
|
|
}
|
|
if len(entry.Temperature) == 0 {
|
|
t.Fatalf("expected temperature history for %s", target.ResourceID)
|
|
}
|
|
last := entry.Temperature[len(entry.Temperature)-1]
|
|
if last.Value != 63 {
|
|
t.Fatalf("expected last disk temperature 63, got %.2f", last.Value)
|
|
}
|
|
}
|
|
|
|
func TestSyncUnifiedStorageAndDiskMetricsSkipMockOwnedTrueNASHistoryWhenMockEnabled(t *testing.T) {
|
|
previousFeature := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() {
|
|
truenas.SetFeatureEnabled(previousFeature)
|
|
})
|
|
|
|
previousMock := mock.IsMockEnabled()
|
|
mock.SetEnabled(true)
|
|
t.Cleanup(func() {
|
|
mock.SetEnabled(previousMock)
|
|
})
|
|
|
|
cfg := metrics.DefaultConfig(t.TempDir())
|
|
store, err := metrics.NewStore(cfg)
|
|
if err != nil {
|
|
t.Fatalf("metrics.NewStore() error = %v", err)
|
|
}
|
|
defer func() { _ = store.Close() }()
|
|
|
|
resourceStore := unifiedresources.NewMonitorAdapter(nil)
|
|
resourceStore.PopulateSnapshotAndSupplemental(models.StateSnapshot{}, map[unifiedresources.DataSource][]unifiedresources.IngestRecord{
|
|
unifiedresources.SourceTrueNAS: truenas.NewProvider(truenas.DefaultFixtures()).Records(),
|
|
})
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: resourceStore,
|
|
metricsHistory: NewMetricsHistory(1024, 24*time.Hour),
|
|
metricsStore: store,
|
|
}
|
|
|
|
monitor.syncUnifiedStorageMetrics(resourceStore)
|
|
monitor.syncUnifiedPhysicalDiskMetrics(resourceStore)
|
|
|
|
if got := len(monitor.GetStorageMetrics("pool:tank", time.Hour)["usage"]); got != 0 {
|
|
t.Fatalf("expected mock-owned TrueNAS storage history to be skipped, got %d usage points", got)
|
|
}
|
|
|
|
points, err := store.Query("disk", "WD-WX12A3456", "smart_temp", time.Now().Add(-time.Hour), time.Now(), 0)
|
|
if err != nil {
|
|
t.Fatalf("store.Query() error = %v", err)
|
|
}
|
|
if len(points) != 0 {
|
|
t.Fatalf("expected mock-owned TrueNAS disk history to be skipped, got %d points", len(points))
|
|
}
|
|
}
|
|
|
|
func TestBuildBroadcastFrontendStatePrefersLiveAlertManagerOverSnapshotAlerts(t *testing.T) {
|
|
alertManager := alerts.NewManagerWithDataDir(t.TempDir())
|
|
defer alertManager.Stop()
|
|
|
|
alertManager.SyncUnifiedResourceIncidents([]unifiedresources.Resource{{
|
|
ID: "agent:truenas-main",
|
|
Type: unifiedresources.ResourceTypeAgent,
|
|
Name: "truenas-main",
|
|
Sources: []unifiedresources.DataSource{unifiedresources.SourceTrueNAS},
|
|
TrueNAS: &unifiedresources.TrueNASData{Hostname: "truenas-main"},
|
|
Incidents: []unifiedresources.ResourceIncident{{
|
|
Provider: "truenas",
|
|
NativeID: "alert-live-1",
|
|
Code: "truenas_volume_status",
|
|
Severity: storagehealth.RiskCritical,
|
|
Summary: "Pool tank is FAULTED",
|
|
}},
|
|
}})
|
|
|
|
monitor := &Monitor{
|
|
state: models.NewState(),
|
|
alertManager: alertManager,
|
|
}
|
|
|
|
staleSnapshot := models.StateSnapshot{
|
|
ActiveAlerts: []models.Alert{{
|
|
ID: "snapshot-stale-1",
|
|
Type: "offline",
|
|
ResourceID: "stale-resource",
|
|
ResourceName: "stale-resource",
|
|
Message: "stale snapshot alert",
|
|
}},
|
|
}
|
|
|
|
frontend := monitor.buildBroadcastFrontendStateFromSnapshot(staleSnapshot)
|
|
|
|
if len(frontend.ActiveAlerts) != 1 {
|
|
t.Fatalf("expected 1 live active alert in frontend state, got %d", len(frontend.ActiveAlerts))
|
|
}
|
|
if got := frontend.ActiveAlerts[0].ID; got == "snapshot-stale-1" || got == "" {
|
|
t.Fatalf("expected live alert manager id, got %q", got)
|
|
}
|
|
if got := frontend.ActiveAlerts[0].Type; got != "resource-incident" {
|
|
t.Fatalf("alert type = %q, want resource-incident", got)
|
|
}
|
|
if got := frontend.ActiveAlerts[0].Message; got == "stale snapshot alert" {
|
|
t.Fatalf("expected live alert manager payload, got stale snapshot message %q", got)
|
|
}
|
|
}
|
|
|
|
func TestActiveAlertsSnapshotPrefersLiveAlertManagerOverStateSnapshot(t *testing.T) {
|
|
alertManager := alerts.NewManagerWithDataDir(t.TempDir())
|
|
defer alertManager.Stop()
|
|
|
|
alertManager.SyncUnifiedResourceIncidents([]unifiedresources.Resource{{
|
|
ID: "agent:truenas-live",
|
|
Type: unifiedresources.ResourceTypeAgent,
|
|
Name: "truenas-live",
|
|
Sources: []unifiedresources.DataSource{unifiedresources.SourceTrueNAS},
|
|
TrueNAS: &unifiedresources.TrueNASData{Hostname: "truenas-live"},
|
|
Incidents: []unifiedresources.ResourceIncident{{
|
|
Provider: "truenas",
|
|
NativeID: "live-active-1",
|
|
Code: "truenas_volume_status",
|
|
Severity: storagehealth.RiskCritical,
|
|
Summary: "Pool tank is FAULTED",
|
|
}},
|
|
}})
|
|
|
|
state := models.NewState()
|
|
state.UpdateActiveAlerts([]models.Alert{{
|
|
ID: "snapshot-active-1",
|
|
Type: "offline",
|
|
ResourceID: "snapshot-resource",
|
|
ResourceName: "snapshot-resource",
|
|
Message: "stale snapshot active alert",
|
|
}})
|
|
|
|
monitor := &Monitor{
|
|
state: state,
|
|
alertManager: alertManager,
|
|
}
|
|
|
|
active := monitor.ActiveAlertsSnapshot()
|
|
if len(active) != 1 {
|
|
t.Fatalf("expected 1 live active alert, got %d", len(active))
|
|
}
|
|
if got := active[0].ID; got == "snapshot-active-1" || got == "" {
|
|
t.Fatalf("expected live alert manager alert id, got %q", got)
|
|
}
|
|
if got := active[0].Type; got != "resource-incident" {
|
|
t.Fatalf("alert type = %q, want resource-incident", got)
|
|
}
|
|
}
|
|
|
|
func TestRecentlyResolvedSnapshotPrefersLiveAlertManagerOverStateSnapshot(t *testing.T) {
|
|
alertManager := alerts.NewManagerWithDataDir(t.TempDir())
|
|
defer alertManager.Stop()
|
|
|
|
resource := unifiedresources.Resource{
|
|
ID: "agent:truenas-live",
|
|
Type: unifiedresources.ResourceTypeAgent,
|
|
Name: "truenas-live",
|
|
Sources: []unifiedresources.DataSource{unifiedresources.SourceTrueNAS},
|
|
TrueNAS: &unifiedresources.TrueNASData{Hostname: "truenas-live"},
|
|
Incidents: []unifiedresources.ResourceIncident{{
|
|
Provider: "truenas",
|
|
NativeID: "live-resolved-1",
|
|
Code: "truenas_volume_status",
|
|
Severity: storagehealth.RiskCritical,
|
|
Summary: "Pool tank is FAULTED",
|
|
}},
|
|
}
|
|
alertManager.SyncUnifiedResourceIncidents([]unifiedresources.Resource{resource})
|
|
alertManager.SyncUnifiedResourceIncidents(nil)
|
|
|
|
state := models.NewState()
|
|
state.UpdateRecentlyResolved([]models.ResolvedAlert{{
|
|
Alert: models.Alert{
|
|
ID: "snapshot-resolved-1",
|
|
Type: "offline",
|
|
ResourceID: "snapshot-resource",
|
|
ResourceName: "snapshot-resource",
|
|
Message: "stale snapshot resolved alert",
|
|
},
|
|
ResolvedTime: time.Now().UTC(),
|
|
}})
|
|
|
|
monitor := &Monitor{
|
|
state: state,
|
|
alertManager: alertManager,
|
|
}
|
|
|
|
resolved := monitor.RecentlyResolvedSnapshot()
|
|
if len(resolved) == 0 {
|
|
t.Fatal("expected recently resolved alert from live alert manager")
|
|
}
|
|
if got := resolved[0].ID; got == "snapshot-resolved-1" || got == "" {
|
|
t.Fatalf("expected live resolved alert id, got %q", got)
|
|
}
|
|
if got := resolved[0].Type; got != "resource-incident" {
|
|
t.Fatalf("resolved alert type = %q, want resource-incident", got)
|
|
}
|
|
}
|
|
|
|
func TestBuildBroadcastFrontendStatePreservesSnapshotAlertsWithoutAlertManager(t *testing.T) {
|
|
monitor := &Monitor{
|
|
state: models.NewState(),
|
|
}
|
|
|
|
snapshot := models.StateSnapshot{
|
|
ActiveAlerts: []models.Alert{{
|
|
ID: "snapshot-only-1",
|
|
Type: "offline",
|
|
ResourceID: "snapshot-resource",
|
|
ResourceName: "snapshot-resource",
|
|
Message: "snapshot-only alert",
|
|
}},
|
|
}
|
|
|
|
frontend := monitor.buildBroadcastFrontendStateFromSnapshot(snapshot)
|
|
if len(frontend.ActiveAlerts) != 1 {
|
|
t.Fatalf("expected snapshot alert to be preserved, got %#v", frontend.ActiveAlerts)
|
|
}
|
|
if got := frontend.ActiveAlerts[0].ID; got != "snapshot-only-1" {
|
|
t.Fatalf("alert id = %q, want snapshot-only-1", got)
|
|
}
|
|
}
|
|
|
|
func TestRecordTaskResult_Success(t *testing.T) {
|
|
m := &Monitor{
|
|
pollStatusMap: make(map[string]*pollStatus),
|
|
failureCounts: make(map[string]int),
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
circuitBreakers: make(map[string]*circuitBreaker),
|
|
}
|
|
|
|
// Record a success
|
|
m.recordTaskResult(InstanceTypePVE, "test-instance", nil)
|
|
|
|
key := schedulerKey(InstanceTypePVE, "test-instance")
|
|
|
|
// Verify failure count is reset
|
|
if m.failureCounts[key] != 0 {
|
|
t.Errorf("expected failureCounts[%s] = 0, got %d", key, m.failureCounts[key])
|
|
}
|
|
|
|
// Verify last outcome is success
|
|
outcome, ok := m.lastOutcome[key]
|
|
if !ok {
|
|
t.Fatalf("expected lastOutcome[%s] to exist", key)
|
|
}
|
|
if !outcome.success {
|
|
t.Error("expected outcome.success = true")
|
|
}
|
|
|
|
// Verify poll status
|
|
status, ok := m.pollStatusMap[key]
|
|
if !ok {
|
|
t.Fatalf("expected pollStatusMap[%s] to exist", key)
|
|
}
|
|
if status.ConsecutiveFailures != 0 {
|
|
t.Errorf("expected ConsecutiveFailures = 0, got %d", status.ConsecutiveFailures)
|
|
}
|
|
}
|
|
|
|
func TestRecordTaskResult_Failure(t *testing.T) {
|
|
m := &Monitor{
|
|
pollStatusMap: make(map[string]*pollStatus),
|
|
failureCounts: make(map[string]int),
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
circuitBreakers: make(map[string]*circuitBreaker),
|
|
}
|
|
|
|
testErr := errors.New("connection refused")
|
|
|
|
// Record a failure
|
|
m.recordTaskResult(InstanceTypePVE, "test-instance", testErr)
|
|
|
|
key := schedulerKey(InstanceTypePVE, "test-instance")
|
|
|
|
// Verify failure count is incremented
|
|
if m.failureCounts[key] != 1 {
|
|
t.Errorf("expected failureCounts[%s] = 1, got %d", key, m.failureCounts[key])
|
|
}
|
|
|
|
// Verify last outcome is failure
|
|
outcome, ok := m.lastOutcome[key]
|
|
if !ok {
|
|
t.Fatalf("expected lastOutcome[%s] to exist", key)
|
|
}
|
|
if outcome.success {
|
|
t.Error("expected outcome.success = false")
|
|
}
|
|
if outcome.err != testErr {
|
|
t.Errorf("expected outcome.err = %v, got %v", testErr, outcome.err)
|
|
}
|
|
|
|
// Verify poll status
|
|
status, ok := m.pollStatusMap[key]
|
|
if !ok {
|
|
t.Fatalf("expected pollStatusMap[%s] to exist", key)
|
|
}
|
|
if status.ConsecutiveFailures != 1 {
|
|
t.Errorf("expected ConsecutiveFailures = 1, got %d", status.ConsecutiveFailures)
|
|
}
|
|
if status.LastErrorMessage != "connection refused" {
|
|
t.Errorf("expected LastErrorMessage = 'connection refused', got %q", status.LastErrorMessage)
|
|
}
|
|
}
|
|
|
|
func TestRecordTaskResult_ConsecutiveFailures(t *testing.T) {
|
|
m := &Monitor{
|
|
pollStatusMap: make(map[string]*pollStatus),
|
|
failureCounts: make(map[string]int),
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
circuitBreakers: make(map[string]*circuitBreaker),
|
|
}
|
|
|
|
testErr := errors.New("timeout")
|
|
|
|
// Record multiple failures
|
|
m.recordTaskResult(InstanceTypePBS, "pbs-server", testErr)
|
|
m.recordTaskResult(InstanceTypePBS, "pbs-server", testErr)
|
|
m.recordTaskResult(InstanceTypePBS, "pbs-server", testErr)
|
|
|
|
key := schedulerKey(InstanceTypePBS, "pbs-server")
|
|
|
|
// Verify consecutive failures count
|
|
status := m.pollStatusMap[key]
|
|
if status.ConsecutiveFailures != 3 {
|
|
t.Errorf("expected ConsecutiveFailures = 3, got %d", status.ConsecutiveFailures)
|
|
}
|
|
|
|
// FirstFailureAt should be set on first failure and not change
|
|
if status.FirstFailureAt.IsZero() {
|
|
t.Error("expected FirstFailureAt to be set")
|
|
}
|
|
}
|
|
|
|
func TestRecordTaskResult_SuccessResetsFailures(t *testing.T) {
|
|
m := &Monitor{
|
|
pollStatusMap: make(map[string]*pollStatus),
|
|
failureCounts: make(map[string]int),
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
circuitBreakers: make(map[string]*circuitBreaker),
|
|
}
|
|
|
|
testErr := errors.New("error")
|
|
key := schedulerKey(InstanceTypePMG, "pmg-server")
|
|
|
|
// Record some failures first
|
|
m.recordTaskResult(InstanceTypePMG, "pmg-server", testErr)
|
|
m.recordTaskResult(InstanceTypePMG, "pmg-server", testErr)
|
|
|
|
if m.pollStatusMap[key].ConsecutiveFailures != 2 {
|
|
t.Fatalf("expected 2 failures before reset")
|
|
}
|
|
|
|
// Now record a success
|
|
m.recordTaskResult(InstanceTypePMG, "pmg-server", nil)
|
|
|
|
// Verify everything is reset
|
|
if m.failureCounts[key] != 0 {
|
|
t.Errorf("expected failureCounts to be reset to 0, got %d", m.failureCounts[key])
|
|
}
|
|
if m.pollStatusMap[key].ConsecutiveFailures != 0 {
|
|
t.Errorf("expected ConsecutiveFailures to be reset to 0, got %d", m.pollStatusMap[key].ConsecutiveFailures)
|
|
}
|
|
if !m.pollStatusMap[key].FirstFailureAt.IsZero() {
|
|
t.Error("expected FirstFailureAt to be reset to zero")
|
|
}
|
|
}
|
|
|
|
func TestRecordTaskResult_NilMaps(t *testing.T) {
|
|
// Monitor with nil internal maps - should not panic
|
|
m := &Monitor{
|
|
pollStatusMap: make(map[string]*pollStatus),
|
|
failureCounts: nil, // nil
|
|
lastOutcome: nil, // nil
|
|
circuitBreakers: make(map[string]*circuitBreaker),
|
|
}
|
|
|
|
// Should not panic
|
|
m.recordTaskResult(InstanceTypePVE, "test", nil)
|
|
m.recordTaskResult(InstanceTypePVE, "test", errors.New("error"))
|
|
|
|
// pollStatusMap should still be updated
|
|
key := schedulerKey(InstanceTypePVE, "test")
|
|
if _, ok := m.pollStatusMap[key]; !ok {
|
|
t.Error("expected pollStatusMap to be updated even with nil failureCounts/lastOutcome")
|
|
}
|
|
}
|
|
|
|
func TestDescribeInstancesForScheduler_NoClients(t *testing.T) {
|
|
m := &Monitor{
|
|
pveClients: make(map[string]PVEClientInterface),
|
|
pbsClients: make(map[string]*pbs.Client),
|
|
pmgClients: make(map[string]*pmg.Client),
|
|
}
|
|
|
|
descriptors := m.describeInstancesForScheduler()
|
|
if descriptors != nil {
|
|
t.Errorf("expected nil for empty clients, got %v", descriptors)
|
|
}
|
|
}
|
|
|
|
func TestDescribeInstancesForScheduler_PVEOnly(t *testing.T) {
|
|
m := &Monitor{
|
|
pveClients: map[string]PVEClientInterface{"pve-1": nil, "pve-2": nil},
|
|
pbsClients: make(map[string]*pbs.Client),
|
|
pmgClients: make(map[string]*pmg.Client),
|
|
}
|
|
|
|
descriptors := m.describeInstancesForScheduler()
|
|
if len(descriptors) != 2 {
|
|
t.Fatalf("expected 2 descriptors, got %d", len(descriptors))
|
|
}
|
|
|
|
// Should be sorted
|
|
if descriptors[0].Name != "pve-1" || descriptors[1].Name != "pve-2" {
|
|
t.Errorf("expected sorted order [pve-1, pve-2], got [%s, %s]", descriptors[0].Name, descriptors[1].Name)
|
|
}
|
|
|
|
for _, desc := range descriptors {
|
|
if desc.Type != InstanceTypePVE {
|
|
t.Errorf("expected type PVE, got %v", desc.Type)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestDescribeInstancesForScheduler_PBSOnly(t *testing.T) {
|
|
m := &Monitor{
|
|
pveClients: make(map[string]PVEClientInterface),
|
|
pbsClients: map[string]*pbs.Client{"pbs-backup": nil},
|
|
pmgClients: make(map[string]*pmg.Client),
|
|
}
|
|
|
|
descriptors := m.describeInstancesForScheduler()
|
|
if len(descriptors) != 1 {
|
|
t.Fatalf("expected 1 descriptor, got %d", len(descriptors))
|
|
}
|
|
|
|
if descriptors[0].Name != "pbs-backup" {
|
|
t.Errorf("expected name 'pbs-backup', got %q", descriptors[0].Name)
|
|
}
|
|
if descriptors[0].Type != InstanceTypePBS {
|
|
t.Errorf("expected type PBS, got %v", descriptors[0].Type)
|
|
}
|
|
}
|
|
|
|
func TestDescribeInstancesForScheduler_PMGOnly(t *testing.T) {
|
|
m := &Monitor{
|
|
pveClients: make(map[string]PVEClientInterface),
|
|
pbsClients: make(map[string]*pbs.Client),
|
|
pmgClients: map[string]*pmg.Client{"pmg-mail": nil},
|
|
}
|
|
|
|
descriptors := m.describeInstancesForScheduler()
|
|
if len(descriptors) != 1 {
|
|
t.Fatalf("expected 1 descriptor, got %d", len(descriptors))
|
|
}
|
|
|
|
if descriptors[0].Name != "pmg-mail" {
|
|
t.Errorf("expected name 'pmg-mail', got %q", descriptors[0].Name)
|
|
}
|
|
if descriptors[0].Type != InstanceTypePMG {
|
|
t.Errorf("expected type PMG, got %v", descriptors[0].Type)
|
|
}
|
|
}
|
|
|
|
func TestDescribeInstancesForScheduler_AllTypes(t *testing.T) {
|
|
m := &Monitor{
|
|
pveClients: map[string]PVEClientInterface{"pve-1": nil},
|
|
pbsClients: map[string]*pbs.Client{"pbs-1": nil},
|
|
pmgClients: map[string]*pmg.Client{"pmg-1": nil},
|
|
}
|
|
|
|
descriptors := m.describeInstancesForScheduler()
|
|
if len(descriptors) != 3 {
|
|
t.Fatalf("expected 3 descriptors, got %d", len(descriptors))
|
|
}
|
|
|
|
// Check we have one of each type
|
|
types := make(map[InstanceType]bool)
|
|
for _, desc := range descriptors {
|
|
types[desc.Type] = true
|
|
}
|
|
if !types[InstanceTypePVE] || !types[InstanceTypePBS] || !types[InstanceTypePMG] {
|
|
t.Error("expected one descriptor of each type")
|
|
}
|
|
}
|
|
|
|
func TestDescribeInstancesForScheduler_NilSchedulerAndTracker(t *testing.T) {
|
|
m := &Monitor{
|
|
pveClients: map[string]PVEClientInterface{"pve-1": nil},
|
|
pbsClients: make(map[string]*pbs.Client),
|
|
pmgClients: make(map[string]*pmg.Client),
|
|
scheduler: nil, // explicitly nil
|
|
stalenessTracker: nil, // explicitly nil
|
|
}
|
|
|
|
// Should not panic with nil scheduler and stalenessTracker
|
|
descriptors := m.describeInstancesForScheduler()
|
|
if len(descriptors) != 1 {
|
|
t.Fatalf("expected 1 descriptor, got %d", len(descriptors))
|
|
}
|
|
|
|
// LastScheduled and LastSuccess should be zero values
|
|
if !descriptors[0].LastScheduled.IsZero() {
|
|
t.Error("expected LastScheduled to be zero with nil scheduler")
|
|
}
|
|
if !descriptors[0].LastSuccess.IsZero() {
|
|
t.Error("expected LastSuccess to be zero with nil stalenessTracker")
|
|
}
|
|
}
|
|
|
|
func TestRescheduleTask_SuccessfulOutcome(t *testing.T) {
|
|
cfg := &config.Config{
|
|
PVEPollingInterval: 30 * time.Second,
|
|
AdaptivePollingBaseInterval: 10 * time.Second,
|
|
}
|
|
|
|
m := &Monitor{
|
|
config: cfg,
|
|
taskQueue: NewTaskQueue(),
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
failureCounts: make(map[string]int),
|
|
}
|
|
|
|
task := ScheduledTask{
|
|
InstanceName: "pve-1",
|
|
InstanceType: InstanceTypePVE,
|
|
Interval: 30 * time.Second,
|
|
NextRun: time.Now(),
|
|
}
|
|
|
|
key := schedulerKey(task.InstanceType, task.InstanceName)
|
|
|
|
// Record a successful outcome
|
|
m.lastOutcome[key] = taskOutcome{success: true}
|
|
|
|
m.rescheduleTask(task)
|
|
|
|
// Task should be rescheduled at regular interval (no backoff)
|
|
m.taskQueue.mu.Lock()
|
|
entry, ok := m.taskQueue.entries[key]
|
|
m.taskQueue.mu.Unlock()
|
|
|
|
if !ok {
|
|
t.Fatal("expected task to be rescheduled")
|
|
}
|
|
|
|
// Should use base interval since scheduler is nil
|
|
if entry.task.Interval != cfg.PVEPollingInterval {
|
|
t.Errorf("expected interval %v, got %v", cfg.PVEPollingInterval, entry.task.Interval)
|
|
}
|
|
}
|
|
|
|
func TestRescheduleTask_TransientFailureWithBackoff(t *testing.T) {
|
|
cfg := &config.Config{
|
|
PVEPollingInterval: 30 * time.Second,
|
|
AdaptivePollingBaseInterval: 10 * time.Second,
|
|
}
|
|
|
|
m := &Monitor{
|
|
config: cfg,
|
|
taskQueue: NewTaskQueue(),
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
failureCounts: make(map[string]int),
|
|
maxRetryAttempts: 5,
|
|
backoffCfg: backoffConfig{
|
|
Initial: 5 * time.Second,
|
|
Multiplier: 2,
|
|
Jitter: 0, // no jitter for predictable testing
|
|
Max: 5 * time.Minute,
|
|
},
|
|
}
|
|
|
|
// Add randomFloat method for backoff calculation
|
|
m.rng = nil // will use default random
|
|
|
|
task := ScheduledTask{
|
|
InstanceName: "pve-1",
|
|
InstanceType: InstanceTypePVE,
|
|
Interval: 30 * time.Second,
|
|
NextRun: time.Now(),
|
|
}
|
|
|
|
key := schedulerKey(task.InstanceType, task.InstanceName)
|
|
|
|
// Record a transient failure (1st attempt, below maxRetryAttempts)
|
|
m.failureCounts[key] = 1
|
|
m.lastOutcome[key] = taskOutcome{
|
|
success: false,
|
|
transient: true,
|
|
err: errors.New("connection timeout"),
|
|
}
|
|
|
|
m.rescheduleTask(task)
|
|
|
|
// Task should be rescheduled with backoff delay
|
|
m.taskQueue.mu.Lock()
|
|
entry, ok := m.taskQueue.entries[key]
|
|
m.taskQueue.mu.Unlock()
|
|
|
|
if !ok {
|
|
t.Fatal("expected task to be rescheduled with backoff")
|
|
}
|
|
|
|
// With backoff, interval should be modified
|
|
if entry.task.Interval <= 0 {
|
|
t.Errorf("expected positive backoff interval, got %v", entry.task.Interval)
|
|
}
|
|
}
|
|
|
|
func TestRescheduleTask_NonTransientFailureGoesToDeadLetter(t *testing.T) {
|
|
cfg := &config.Config{
|
|
PVEPollingInterval: 30 * time.Second,
|
|
}
|
|
|
|
deadLetterQ := NewTaskQueue()
|
|
|
|
m := &Monitor{
|
|
config: cfg,
|
|
taskQueue: NewTaskQueue(),
|
|
deadLetterQueue: deadLetterQ,
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
failureCounts: make(map[string]int),
|
|
maxRetryAttempts: 5,
|
|
}
|
|
|
|
task := ScheduledTask{
|
|
InstanceName: "pve-1",
|
|
InstanceType: InstanceTypePVE,
|
|
Interval: 30 * time.Second,
|
|
NextRun: time.Now(),
|
|
}
|
|
|
|
key := schedulerKey(task.InstanceType, task.InstanceName)
|
|
|
|
// Record a non-transient failure (permanent error)
|
|
m.failureCounts[key] = 1
|
|
m.lastOutcome[key] = taskOutcome{
|
|
success: false,
|
|
transient: false, // non-transient
|
|
err: errors.New("authentication failed"),
|
|
}
|
|
|
|
m.rescheduleTask(task)
|
|
|
|
// Task should NOT be in the main queue
|
|
m.taskQueue.mu.Lock()
|
|
_, inMainQueue := m.taskQueue.entries[key]
|
|
m.taskQueue.mu.Unlock()
|
|
|
|
if inMainQueue {
|
|
t.Error("expected task to NOT be in main queue after non-transient failure")
|
|
}
|
|
|
|
// Task should be in dead letter queue
|
|
deadLetterQ.mu.Lock()
|
|
dlqSize := len(deadLetterQ.entries)
|
|
deadLetterQ.mu.Unlock()
|
|
|
|
if dlqSize != 1 {
|
|
t.Errorf("expected 1 task in dead letter queue, got %d", dlqSize)
|
|
}
|
|
}
|
|
|
|
func TestRescheduleTask_ExceededRetryAttemptsGoesToDeadLetter(t *testing.T) {
|
|
cfg := &config.Config{
|
|
PVEPollingInterval: 30 * time.Second,
|
|
}
|
|
|
|
deadLetterQ := NewTaskQueue()
|
|
|
|
m := &Monitor{
|
|
config: cfg,
|
|
taskQueue: NewTaskQueue(),
|
|
deadLetterQueue: deadLetterQ,
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
failureCounts: make(map[string]int),
|
|
maxRetryAttempts: 3,
|
|
}
|
|
|
|
task := ScheduledTask{
|
|
InstanceName: "pve-1",
|
|
InstanceType: InstanceTypePVE,
|
|
Interval: 30 * time.Second,
|
|
NextRun: time.Now(),
|
|
}
|
|
|
|
key := schedulerKey(task.InstanceType, task.InstanceName)
|
|
|
|
// Exceed max retry attempts (failureCount >= maxRetryAttempts)
|
|
m.failureCounts[key] = 3
|
|
m.lastOutcome[key] = taskOutcome{
|
|
success: false,
|
|
transient: true, // transient, but exceeded retries
|
|
err: errors.New("connection timeout"),
|
|
}
|
|
|
|
m.rescheduleTask(task)
|
|
|
|
// Task should be in dead letter queue
|
|
deadLetterQ.mu.Lock()
|
|
dlqSize := len(deadLetterQ.entries)
|
|
deadLetterQ.mu.Unlock()
|
|
|
|
if dlqSize != 1 {
|
|
t.Errorf("expected 1 task in dead letter queue after exceeding retries, got %d", dlqSize)
|
|
}
|
|
}
|
|
|
|
func TestRescheduleTask_NoOutcomeUsesDefaultInterval(t *testing.T) {
|
|
cfg := &config.Config{
|
|
PVEPollingInterval: 45 * time.Second,
|
|
AdaptivePollingBaseInterval: 10 * time.Second,
|
|
}
|
|
|
|
m := &Monitor{
|
|
config: cfg,
|
|
taskQueue: NewTaskQueue(),
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
failureCounts: make(map[string]int),
|
|
}
|
|
|
|
task := ScheduledTask{
|
|
InstanceName: "pve-1",
|
|
InstanceType: InstanceTypePVE,
|
|
Interval: 0, // no interval set
|
|
NextRun: time.Now(),
|
|
}
|
|
|
|
key := schedulerKey(task.InstanceType, task.InstanceName)
|
|
|
|
// No outcome recorded - hasOutcome will be false
|
|
m.rescheduleTask(task)
|
|
|
|
m.taskQueue.mu.Lock()
|
|
entry, ok := m.taskQueue.entries[key]
|
|
m.taskQueue.mu.Unlock()
|
|
|
|
if !ok {
|
|
t.Fatal("expected task to be rescheduled")
|
|
}
|
|
|
|
// Should use config PVE polling interval
|
|
if entry.task.Interval != cfg.PVEPollingInterval {
|
|
t.Errorf("expected interval %v, got %v", cfg.PVEPollingInterval, entry.task.Interval)
|
|
}
|
|
}
|
|
|
|
func TestRescheduleTask_PBSInstance(t *testing.T) {
|
|
cfg := &config.Config{
|
|
PBSPollingInterval: 60 * time.Second,
|
|
AdaptivePollingBaseInterval: 10 * time.Second,
|
|
}
|
|
|
|
m := &Monitor{
|
|
config: cfg,
|
|
taskQueue: NewTaskQueue(),
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
failureCounts: make(map[string]int),
|
|
}
|
|
|
|
task := ScheduledTask{
|
|
InstanceName: "pbs-1",
|
|
InstanceType: InstanceTypePBS,
|
|
Interval: 0,
|
|
NextRun: time.Now(),
|
|
}
|
|
|
|
key := schedulerKey(task.InstanceType, task.InstanceName)
|
|
|
|
m.rescheduleTask(task)
|
|
|
|
m.taskQueue.mu.Lock()
|
|
entry, ok := m.taskQueue.entries[key]
|
|
m.taskQueue.mu.Unlock()
|
|
|
|
if !ok {
|
|
t.Fatal("expected PBS task to be rescheduled")
|
|
}
|
|
|
|
if entry.task.Interval != cfg.PBSPollingInterval {
|
|
t.Errorf("expected PBS interval %v, got %v", cfg.PBSPollingInterval, entry.task.Interval)
|
|
}
|
|
}
|
|
|
|
func TestRescheduleTask_PMGInstance(t *testing.T) {
|
|
cfg := &config.Config{
|
|
PMGPollingInterval: 90 * time.Second,
|
|
AdaptivePollingBaseInterval: 10 * time.Second,
|
|
}
|
|
|
|
m := &Monitor{
|
|
config: cfg,
|
|
taskQueue: NewTaskQueue(),
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
failureCounts: make(map[string]int),
|
|
}
|
|
|
|
task := ScheduledTask{
|
|
InstanceName: "pmg-1",
|
|
InstanceType: InstanceTypePMG,
|
|
Interval: 0,
|
|
NextRun: time.Now(),
|
|
}
|
|
|
|
key := schedulerKey(task.InstanceType, task.InstanceName)
|
|
|
|
m.rescheduleTask(task)
|
|
|
|
m.taskQueue.mu.Lock()
|
|
entry, ok := m.taskQueue.entries[key]
|
|
m.taskQueue.mu.Unlock()
|
|
|
|
if !ok {
|
|
t.Fatal("expected PMG task to be rescheduled")
|
|
}
|
|
|
|
if entry.task.Interval != cfg.PMGPollingInterval {
|
|
t.Errorf("expected PMG interval %v, got %v", cfg.PMGPollingInterval, entry.task.Interval)
|
|
}
|
|
}
|
|
|
|
func TestRescheduleTask_AdaptivePollingMaxIntervalLimit(t *testing.T) {
|
|
cfg := &config.Config{
|
|
PVEPollingInterval: 30 * time.Second,
|
|
AdaptivePollingEnabled: true,
|
|
AdaptivePollingMaxInterval: 10 * time.Second, // <= 15s enables capping
|
|
AdaptivePollingBaseInterval: 5 * time.Second,
|
|
}
|
|
|
|
m := &Monitor{
|
|
config: cfg,
|
|
taskQueue: NewTaskQueue(),
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
failureCounts: make(map[string]int),
|
|
maxRetryAttempts: 5,
|
|
backoffCfg: backoffConfig{
|
|
Initial: 10 * time.Second, // would normally backoff to 10s+
|
|
Multiplier: 2,
|
|
Jitter: 0,
|
|
Max: 5 * time.Minute,
|
|
},
|
|
}
|
|
|
|
task := ScheduledTask{
|
|
InstanceName: "pve-1",
|
|
InstanceType: InstanceTypePVE,
|
|
Interval: 30 * time.Second,
|
|
NextRun: time.Now(),
|
|
}
|
|
|
|
key := schedulerKey(task.InstanceType, task.InstanceName)
|
|
|
|
// Simulate transient failure to trigger backoff
|
|
m.failureCounts[key] = 1
|
|
m.lastOutcome[key] = taskOutcome{
|
|
success: false,
|
|
transient: true,
|
|
err: errors.New("timeout"),
|
|
}
|
|
|
|
m.rescheduleTask(task)
|
|
|
|
m.taskQueue.mu.Lock()
|
|
entry, ok := m.taskQueue.entries[key]
|
|
m.taskQueue.mu.Unlock()
|
|
|
|
if !ok {
|
|
t.Fatal("expected task to be rescheduled")
|
|
}
|
|
|
|
// With AdaptivePollingMaxInterval <= 15s, backoff delay should be capped at 4s
|
|
maxDelay := 4 * time.Second
|
|
if entry.task.Interval > maxDelay {
|
|
t.Errorf("expected backoff interval to be capped at %v, got %v", maxDelay, entry.task.Interval)
|
|
}
|
|
}
|
|
|
|
func TestRescheduleTask_UsesExistingIntervalWhenSet(t *testing.T) {
|
|
cfg := &config.Config{
|
|
PVEPollingInterval: 30 * time.Second,
|
|
AdaptivePollingBaseInterval: 10 * time.Second,
|
|
}
|
|
|
|
m := &Monitor{
|
|
config: cfg,
|
|
taskQueue: NewTaskQueue(),
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
failureCounts: make(map[string]int),
|
|
}
|
|
|
|
customInterval := 45 * time.Second
|
|
task := ScheduledTask{
|
|
InstanceName: "pve-1",
|
|
InstanceType: InstanceTypePVE,
|
|
Interval: customInterval, // custom interval already set
|
|
NextRun: time.Now(),
|
|
}
|
|
|
|
key := schedulerKey(task.InstanceType, task.InstanceName)
|
|
|
|
m.rescheduleTask(task)
|
|
|
|
m.taskQueue.mu.Lock()
|
|
entry, ok := m.taskQueue.entries[key]
|
|
m.taskQueue.mu.Unlock()
|
|
|
|
if !ok {
|
|
t.Fatal("expected task to be rescheduled")
|
|
}
|
|
|
|
// Should use the existing interval when it's already set
|
|
if entry.task.Interval != customInterval {
|
|
t.Errorf("expected existing interval %v to be preserved, got %v", customInterval, entry.task.Interval)
|
|
}
|
|
}
|
|
|
|
func TestCustomPollProviderIntegration(t *testing.T) {
|
|
const customType InstanceType = "xcp"
|
|
const customName = "xcp-cluster-1"
|
|
customInterval := 42 * time.Second
|
|
|
|
var executed atomic.Bool
|
|
|
|
monitor := &Monitor{
|
|
taskQueue: NewTaskQueue(),
|
|
}
|
|
if err := monitor.RegisterPollProvider(testPollProvider{
|
|
providerType: customType,
|
|
instances: []string{customName},
|
|
interval: customInterval,
|
|
buildPollTask: func(instanceName string) (PollTask, error) {
|
|
return PollTask{
|
|
InstanceName: instanceName,
|
|
InstanceType: string(customType),
|
|
Run: func(context.Context) {
|
|
executed.Store(true)
|
|
},
|
|
}, nil
|
|
},
|
|
}); err != nil {
|
|
t.Fatalf("RegisterPollProvider failed: %v", err)
|
|
}
|
|
monitor.SetExecutor(nil) // restore default executor
|
|
|
|
tasks := monitor.buildScheduledTasks(time.Now())
|
|
if len(tasks) != 1 {
|
|
t.Fatalf("expected 1 scheduled task for custom provider, got %d", len(tasks))
|
|
}
|
|
task := tasks[0]
|
|
if task.InstanceType != customType {
|
|
t.Fatalf("expected custom instance type %q, got %q", customType, task.InstanceType)
|
|
}
|
|
if task.InstanceName != customName {
|
|
t.Fatalf("expected custom instance name %q, got %q", customName, task.InstanceName)
|
|
}
|
|
if task.Interval != customInterval {
|
|
t.Fatalf("expected custom interval %v, got %v", customInterval, task.Interval)
|
|
}
|
|
|
|
monitor.executeScheduledTask(context.Background(), task)
|
|
if !executed.Load() {
|
|
t.Fatal("expected custom provider poll task callback to execute")
|
|
}
|
|
}
|
|
|
|
func TestUpdateResourceStore_IngestsSupplementalRecords(t *testing.T) {
|
|
store := &testSupplementalResourceStore{}
|
|
provider := &testSupplementalPollProvider{
|
|
testPollProvider: testPollProvider{
|
|
providerType: InstanceType("xcp"),
|
|
instances: []string{"xcp-cluster-1"},
|
|
interval: 30 * time.Second,
|
|
},
|
|
source: unifiedresources.DataSource("xcp"),
|
|
recordsByOrg: map[string][]unifiedresources.IngestRecord{
|
|
"default": {
|
|
{
|
|
SourceID: "xcp-host-1",
|
|
Resource: unifiedresources.Resource{
|
|
Type: unifiedresources.ResourceTypeAgent,
|
|
Name: "xcp-host-1",
|
|
Status: unifiedresources.StatusOnline,
|
|
LastSeen: time.Now().UTC(),
|
|
},
|
|
Identity: unifiedresources.ResourceIdentity{Hostnames: []string{"xcp-host-1"}},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: store,
|
|
}
|
|
if err := monitor.RegisterPollProvider(provider); err != nil {
|
|
t.Fatalf("RegisterPollProvider failed: %v", err)
|
|
}
|
|
|
|
monitor.updateResourceStore(models.StateSnapshot{})
|
|
|
|
if store.snapshotCalls != 1 {
|
|
t.Fatalf("expected PopulateFromSnapshot to be called once, got %d", store.snapshotCalls)
|
|
}
|
|
if provider.lastRequestedOrg != "default" {
|
|
t.Fatalf("expected default org lookup, got %q", provider.lastRequestedOrg)
|
|
}
|
|
records := store.recordsBySource[unifiedresources.DataSource("xcp")]
|
|
if len(records) != 1 {
|
|
t.Fatalf("expected 1 supplemental record ingested, got %d", len(records))
|
|
}
|
|
if records[0].SourceID != "xcp-host-1" {
|
|
t.Fatalf("expected supplemental source ID xcp-host-1, got %q", records[0].SourceID)
|
|
}
|
|
}
|
|
|
|
func TestUpdateResourceStore_RecordsSupplementalChanges(t *testing.T) {
|
|
store := &testSupplementalResourceStore{}
|
|
occurredAt := time.Date(2026, 3, 30, 18, 30, 0, 0, time.UTC)
|
|
provider := &testSupplementalPollProvider{
|
|
testPollProvider: testPollProvider{
|
|
providerType: InstanceType("vmware"),
|
|
instances: []string{"vc-1"},
|
|
interval: 30 * time.Second,
|
|
},
|
|
source: unifiedresources.SourceVMware,
|
|
changesByOrg: map[string][]unifiedresources.ResourceChange{
|
|
"default": {
|
|
{
|
|
ID: "activity-1",
|
|
ResourceID: "vc-1:vm:vm-201",
|
|
ObservedAt: occurredAt,
|
|
OccurredAt: &occurredAt,
|
|
Kind: unifiedresources.ChangeActivity,
|
|
SourceType: unifiedresources.SourcePlatformEvent,
|
|
SourceAdapter: unifiedresources.AdapterVMware,
|
|
Confidence: unifiedresources.ConfidenceHigh,
|
|
Reason: "Create snapshot (success)",
|
|
Metadata: map[string]any{
|
|
unifiedresources.MetadataActivityType: "vmware_task",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: store,
|
|
}
|
|
if err := monitor.RegisterPollProvider(provider); err != nil {
|
|
t.Fatalf("RegisterPollProvider failed: %v", err)
|
|
}
|
|
|
|
monitor.updateResourceStore(models.StateSnapshot{})
|
|
|
|
if len(store.recordedChanges) != 1 {
|
|
t.Fatalf("expected 1 supplemental change recorded, got %d", len(store.recordedChanges))
|
|
}
|
|
recorded := store.recordedChanges[0]
|
|
if recorded.Kind != unifiedresources.ChangeActivity {
|
|
t.Fatalf("recorded change kind = %q, want %q", recorded.Kind, unifiedresources.ChangeActivity)
|
|
}
|
|
if recorded.SourceAdapter != unifiedresources.AdapterVMware {
|
|
t.Fatalf("recorded change source adapter = %q, want %q", recorded.SourceAdapter, unifiedresources.AdapterVMware)
|
|
}
|
|
if recorded.OccurredAt == nil || !recorded.OccurredAt.Equal(occurredAt) {
|
|
t.Fatalf("recorded change occurred_at = %v, want %v", recorded.OccurredAt, occurredAt)
|
|
}
|
|
if got := recorded.Metadata[unifiedresources.MetadataActivityType]; got != "vmware_task" {
|
|
t.Fatalf("recorded activity_type = %#v, want vmware_task", got)
|
|
}
|
|
}
|
|
|
|
func TestUpdateResourceStore_SuppressesProviderOwnedSnapshotSources(t *testing.T) {
|
|
store := &testSupplementalResourceStore{}
|
|
provider := &testSupplementalPollProvider{
|
|
testPollProvider: testPollProvider{
|
|
providerType: InstanceType("xcp"),
|
|
instances: []string{"xcp-cluster-1"},
|
|
interval: 30 * time.Second,
|
|
},
|
|
source: unifiedresources.SourceProxmox,
|
|
ownedSources: []unifiedresources.DataSource{unifiedresources.SourceProxmox},
|
|
recordsByOrg: map[string][]unifiedresources.IngestRecord{
|
|
"default": {
|
|
{
|
|
SourceID: "xcp-host-1",
|
|
Resource: unifiedresources.Resource{
|
|
Type: unifiedresources.ResourceTypeAgent,
|
|
Name: "xcp-host-1",
|
|
Status: unifiedresources.StatusOnline,
|
|
LastSeen: time.Now().UTC(),
|
|
},
|
|
Identity: unifiedresources.ResourceIdentity{Hostnames: []string{"xcp-host-1"}},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: store,
|
|
}
|
|
if err := monitor.RegisterPollProvider(provider); err != nil {
|
|
t.Fatalf("RegisterPollProvider failed: %v", err)
|
|
}
|
|
|
|
monitor.updateResourceStore(models.StateSnapshot{
|
|
Nodes: []models.Node{{}},
|
|
VMs: []models.VM{{}},
|
|
Containers: []models.Container{{}},
|
|
Storage: []models.Storage{{}},
|
|
PhysicalDisks: []models.PhysicalDisk{{}},
|
|
CephClusters: []models.CephCluster{{}},
|
|
Hosts: []models.Host{{}},
|
|
})
|
|
|
|
if store.snapshotCalls != 1 {
|
|
t.Fatalf("expected PopulateFromSnapshot to be called once, got %d", store.snapshotCalls)
|
|
}
|
|
if len(store.lastSnapshot.Nodes) != 0 || len(store.lastSnapshot.VMs) != 0 || len(store.lastSnapshot.Containers) != 0 {
|
|
t.Fatalf("expected proxmox compute slices to be suppressed before snapshot ingest")
|
|
}
|
|
if len(store.lastSnapshot.Storage) != 0 || len(store.lastSnapshot.PhysicalDisks) != 0 || len(store.lastSnapshot.CephClusters) != 0 {
|
|
t.Fatalf("expected proxmox storage slices to be suppressed before snapshot ingest")
|
|
}
|
|
if len(store.lastSnapshot.Hosts) != 1 {
|
|
t.Fatalf("expected agent slice to remain in snapshot ingest")
|
|
}
|
|
|
|
records := store.recordsBySource[unifiedresources.SourceProxmox]
|
|
if len(records) != 1 {
|
|
t.Fatalf("expected 1 provider-owned supplemental record, got %d", len(records))
|
|
}
|
|
}
|
|
|
|
func TestUpdateResourceStore_IngestsRegisteredSupplementalProvider(t *testing.T) {
|
|
store := &testSupplementalResourceStore{}
|
|
provider := &testMonitorSupplementalProvider{
|
|
recordsByOrg: map[string][]unifiedresources.IngestRecord{
|
|
"default": {
|
|
{
|
|
SourceID: "tn-host-1",
|
|
Resource: unifiedresources.Resource{
|
|
Type: unifiedresources.ResourceTypeAgent,
|
|
Name: "tn-host-1",
|
|
Status: unifiedresources.StatusOnline,
|
|
LastSeen: time.Now().UTC(),
|
|
},
|
|
Identity: unifiedresources.ResourceIdentity{Hostnames: []string{"tn-host-1"}},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: store,
|
|
}
|
|
monitor.SetSupplementalRecordsProvider(unifiedresources.SourceTrueNAS, provider)
|
|
store.snapshotCalls = 0
|
|
store.recordsBySource = nil
|
|
monitor.updateResourceStore(models.StateSnapshot{})
|
|
|
|
if store.snapshotCalls != 1 {
|
|
t.Fatalf("expected PopulateFromSnapshot to be called once, got %d", store.snapshotCalls)
|
|
}
|
|
if provider.lastRequestedOrg != "default" {
|
|
t.Fatalf("expected default org lookup, got %q", provider.lastRequestedOrg)
|
|
}
|
|
records := store.recordsBySource[unifiedresources.SourceTrueNAS]
|
|
if len(records) != 1 {
|
|
t.Fatalf("expected 1 supplemental record from direct provider, got %d", len(records))
|
|
}
|
|
if records[0].SourceID != "tn-host-1" {
|
|
t.Fatalf("expected source ID tn-host-1, got %q", records[0].SourceID)
|
|
}
|
|
}
|
|
|
|
func TestUpdateResourceStore_UsesAtomicStoreReplacementWhenAvailable(t *testing.T) {
|
|
store := &testAtomicResourceStore{}
|
|
provider := &testMonitorSupplementalProvider{
|
|
recordsByOrg: map[string][]unifiedresources.IngestRecord{
|
|
"default": {
|
|
{
|
|
SourceID: "tn-host-1",
|
|
Resource: unifiedresources.Resource{
|
|
Type: unifiedresources.ResourceTypeAgent,
|
|
Name: "tn-host-1",
|
|
Status: unifiedresources.StatusOnline,
|
|
LastSeen: time.Now().UTC(),
|
|
},
|
|
Identity: unifiedresources.ResourceIdentity{Hostnames: []string{"tn-host-1"}},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: store,
|
|
}
|
|
monitor.SetSupplementalRecordsProvider(unifiedresources.SourceTrueNAS, provider)
|
|
store.snapshotCalls = 0
|
|
store.atomicCalls = 0
|
|
store.lastSnapshot = models.StateSnapshot{}
|
|
store.lastRecordsBySrc = nil
|
|
snapshot := models.StateSnapshot{
|
|
Hosts: []models.Host{{ID: "host-1", Hostname: "minipc", Status: "online"}},
|
|
}
|
|
|
|
monitor.updateResourceStore(snapshot)
|
|
|
|
if store.atomicCalls != 1 {
|
|
t.Fatalf("expected atomic populate to be called once, got %d", store.atomicCalls)
|
|
}
|
|
if store.snapshotCalls != 0 {
|
|
t.Fatalf("expected legacy PopulateFromSnapshot to be skipped, got %d calls", store.snapshotCalls)
|
|
}
|
|
if len(store.lastSnapshot.Hosts) != 1 || store.lastSnapshot.Hosts[0].Hostname != "minipc" {
|
|
t.Fatalf("expected snapshot to be passed atomically, got %#v", store.lastSnapshot)
|
|
}
|
|
records := store.lastRecordsBySrc[unifiedresources.SourceTrueNAS]
|
|
if len(records) != 1 {
|
|
t.Fatalf("expected 1 atomic supplemental record, got %d", len(records))
|
|
}
|
|
if records[0].SourceID != "tn-host-1" {
|
|
t.Fatalf("expected source ID tn-host-1, got %q", records[0].SourceID)
|
|
}
|
|
}
|
|
|
|
func TestUpdateResourceStore_SuppressesSnapshotForRegisteredSupplementalOwnership(t *testing.T) {
|
|
store := &testSupplementalResourceStore{}
|
|
provider := &testMonitorSupplementalProvider{
|
|
ownedSources: []unifiedresources.DataSource{unifiedresources.SourceProxmox},
|
|
recordsByOrg: map[string][]unifiedresources.IngestRecord{
|
|
"default": {
|
|
{
|
|
SourceID: "tn-host-1",
|
|
Resource: unifiedresources.Resource{
|
|
Type: unifiedresources.ResourceTypeAgent,
|
|
Name: "tn-host-1",
|
|
Status: unifiedresources.StatusOnline,
|
|
LastSeen: time.Now().UTC(),
|
|
},
|
|
Identity: unifiedresources.ResourceIdentity{Hostnames: []string{"tn-host-1"}},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
monitor := &Monitor{
|
|
resourceStore: store,
|
|
}
|
|
monitor.SetSupplementalRecordsProvider(unifiedresources.SourceTrueNAS, provider)
|
|
store.snapshotCalls = 0
|
|
store.recordsBySource = nil
|
|
store.lastSnapshot = models.StateSnapshot{}
|
|
monitor.updateResourceStore(models.StateSnapshot{
|
|
Nodes: []models.Node{{}},
|
|
VMs: []models.VM{{}},
|
|
Containers: []models.Container{{}},
|
|
Hosts: []models.Host{{}},
|
|
})
|
|
|
|
if len(store.lastSnapshot.Nodes) != 0 || len(store.lastSnapshot.VMs) != 0 || len(store.lastSnapshot.Containers) != 0 {
|
|
t.Fatalf("expected proxmox slices to be suppressed for direct provider ownership")
|
|
}
|
|
if len(store.lastSnapshot.Hosts) != 1 {
|
|
t.Fatalf("expected non-owned host slice to remain")
|
|
}
|
|
records := store.recordsBySource[unifiedresources.SourceTrueNAS]
|
|
if len(records) != 1 {
|
|
t.Fatalf("expected 1 supplemental record from direct provider, got %d", len(records))
|
|
}
|
|
}
|
|
|
|
func TestSchedulerHealth_UsesProviderInstanceDescriptions(t *testing.T) {
|
|
const customType InstanceType = "xcp"
|
|
monitor := &Monitor{
|
|
config: &config.Config{},
|
|
instanceInfoCache: make(map[string]*instanceInfo),
|
|
}
|
|
|
|
if err := monitor.RegisterPollProvider(testPollProvider{
|
|
providerType: customType,
|
|
instances: []string{"xcp-a"},
|
|
describeInstances: []PollProviderInstanceInfo{
|
|
{
|
|
Name: "xcp-a",
|
|
DisplayName: "XCP Cluster A",
|
|
Connection: "https://xcp-a.example",
|
|
},
|
|
},
|
|
interval: 30 * time.Second,
|
|
}); err != nil {
|
|
t.Fatalf("RegisterPollProvider failed: %v", err)
|
|
}
|
|
|
|
resp := monitor.SchedulerHealth()
|
|
|
|
key := schedulerKey(customType, "xcp-a")
|
|
for _, inst := range resp.Instances {
|
|
if inst.Key != key {
|
|
continue
|
|
}
|
|
if inst.DisplayName != "XCP Cluster A" {
|
|
t.Fatalf("expected display name %q, got %q", "XCP Cluster A", inst.DisplayName)
|
|
}
|
|
if inst.Connection != "https://xcp-a.example" {
|
|
t.Fatalf("expected connection %q, got %q", "https://xcp-a.example", inst.Connection)
|
|
}
|
|
return
|
|
}
|
|
|
|
t.Fatalf("expected instance %q in scheduler health", key)
|
|
}
|
|
|
|
func TestGetConnectionStatuses_CustomProviderStatuses(t *testing.T) {
|
|
const customType InstanceType = "xcp"
|
|
monitor := &Monitor{
|
|
state: models.NewState(),
|
|
}
|
|
if err := monitor.RegisterPollProvider(testPollProvider{
|
|
providerType: customType,
|
|
instances: []string{"xcp-a"},
|
|
connectionStatus: map[string]bool{
|
|
"xcp-xcp-a": true,
|
|
},
|
|
}); err != nil {
|
|
t.Fatalf("RegisterPollProvider failed: %v", err)
|
|
}
|
|
|
|
statuses := monitor.GetConnectionStatuses()
|
|
if connected, ok := statuses["xcp-xcp-a"]; !ok || !connected {
|
|
t.Fatalf("expected xcp-xcp-a to be connected, got exists=%v value=%v", ok, connected)
|
|
}
|
|
}
|
|
|
|
func TestGetConnectionStatuses_CustomProviderFallbackToState(t *testing.T) {
|
|
const customType InstanceType = "xcp"
|
|
monitor := &Monitor{
|
|
state: models.NewState(),
|
|
}
|
|
monitor.state.SetConnectionHealth("xcp-xcp-a", true)
|
|
|
|
if err := monitor.RegisterPollProvider(pollProviderAdapter{
|
|
instanceType: customType,
|
|
listInstances: func(*Monitor) []string {
|
|
return []string{"xcp-a"}
|
|
},
|
|
baseInterval: func(*Monitor) time.Duration { return 30 * time.Second },
|
|
buildPollTask: func(*Monitor, string) (PollTask, error) {
|
|
return PollTask{}, nil
|
|
},
|
|
}); err != nil {
|
|
t.Fatalf("RegisterPollProvider failed: %v", err)
|
|
}
|
|
|
|
statuses := monitor.GetConnectionStatuses()
|
|
if connected, ok := statuses["xcp-xcp-a"]; !ok || !connected {
|
|
t.Fatalf("expected xcp-xcp-a to be connected via fallback, got exists=%v value=%v", ok, connected)
|
|
}
|
|
}
|
|
|
|
func TestGetConnectionStatuses_BuiltInPMGSupport(t *testing.T) {
|
|
monitor := &Monitor{
|
|
config: &config.Config{
|
|
PMGInstances: []config.PMGInstance{
|
|
{Name: "pmg-1"},
|
|
{Name: "pmg-2"},
|
|
},
|
|
},
|
|
state: models.NewState(),
|
|
pmgClients: map[string]*pmg.Client{"pmg-1": {}},
|
|
}
|
|
monitor.state.SetConnectionHealth("pmg-pmg-1", true)
|
|
|
|
statuses := monitor.GetConnectionStatuses()
|
|
if connected, ok := statuses["pmg-pmg-1"]; !ok || !connected {
|
|
t.Fatalf("expected pmg-pmg-1 connected, got exists=%v value=%v", ok, connected)
|
|
}
|
|
if connected, ok := statuses["pmg-pmg-2"]; !ok || connected {
|
|
t.Fatalf("expected pmg-pmg-2 disconnected, got exists=%v value=%v", ok, connected)
|
|
}
|
|
}
|
|
|
|
func TestSetProviderConnectionHealth_UsesProviderConnectionKey(t *testing.T) {
|
|
const customType InstanceType = "xcp"
|
|
const instanceName = "xcp-a"
|
|
const providerKey = "provider/xcp-a"
|
|
|
|
monitor := &Monitor{
|
|
state: models.NewState(),
|
|
}
|
|
if err := monitor.RegisterPollProvider(testPollProvider{
|
|
providerType: customType,
|
|
instances: []string{instanceName},
|
|
connectionKey: providerKey,
|
|
}); err != nil {
|
|
t.Fatalf("RegisterPollProvider failed: %v", err)
|
|
}
|
|
|
|
monitor.setProviderConnectionHealth(customType, instanceName, true)
|
|
|
|
if !monitor.state.ConnectionHealth[providerKey] {
|
|
t.Fatalf("expected provider key %q to be marked healthy", providerKey)
|
|
}
|
|
if _, exists := monitor.state.ConnectionHealth["xcp-"+instanceName]; exists {
|
|
t.Fatalf("did not expect fallback key %q when provider key override is set", "xcp-"+instanceName)
|
|
}
|
|
}
|