Pulse/internal/monitoring/truenas_poller_test.go
2026-04-09 22:43:09 +01:00

1704 lines
60 KiB
Go

package monitoring
import (
"bytes"
"context"
"errors"
"fmt"
"net"
"net/http"
"net/http/httptest"
"net/url"
"strconv"
"strings"
"sync/atomic"
"testing"
"time"
"github.com/rcourtman/pulse-go-rewrite/internal/config"
"github.com/rcourtman/pulse-go-rewrite/internal/truenas"
"github.com/rcourtman/pulse-go-rewrite/internal/unifiedresources"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
func TestTrueNASPollerPollsConfiguredConnections(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
mock := newTrueNASMockServer(t, "nas-one")
t.Cleanup(mock.Close)
mtp, persistence := newTestTenantPersistence(t)
connection := trueNASInstanceForServer(t, "conn-1", mock.URL(), true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
return mock.RequestCount() >= 5 && hasTrueNASHostForOrg(poller, "default", "nas-one")
}, "expected configured TrueNAS connection to poll and ingest host resources")
poller.Stop()
if !hasTrueNASHostForOrg(poller, "default", "nas-one") {
t.Fatal("expected TrueNAS resources to be ingested")
}
}
func TestTrueNASPollerFeatureFlagGate(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(false)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
mock := newTrueNASMockServer(t, "nas-feature-flag-off")
t.Cleanup(mock.Close)
mtp, persistence := newTestTenantPersistence(t)
connection := trueNASInstanceForServer(t, "feature-flag-off-conn", mock.URL(), true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
initialStopped := poller.stopped
poller.Start(context.Background())
if poller.cancel != nil {
t.Fatal("expected Start() to be a no-op with feature flag disabled")
}
if poller.stopped != initialStopped {
t.Fatal("expected stopped channel to remain unchanged when Start() is gated")
}
select {
case <-poller.stopped:
default:
t.Fatal("expected stopped channel to remain pre-closed when Start() is gated")
}
noPollDeadline := time.Now().Add(200 * time.Millisecond)
waitForCondition(t, 500*time.Millisecond, func() bool {
return time.Now().After(noPollDeadline) && mock.RequestCount() == 0
}, "expected no TrueNAS polling requests when feature flag is disabled")
poller.Stop()
}
func TestTrueNASPollerEnableDisableCycle(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
mock := newTrueNASMockServer(t, "nas-enable-disable")
t.Cleanup(mock.Close)
mtp, persistence := newTestTenantPersistence(t)
connection := trueNASInstanceForServer(t, "enable-disable-conn", mock.URL(), true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
return pollerProviderCount(poller) == 1 && pollerHasProvider(poller, connection.ID) && mock.RequestCount() >= 5
}, "expected enabled poller to start and poll configured TrueNAS connection")
poller.Stop()
if !hasTrueNASHostForOrg(poller, "default", "nas-enable-disable") {
t.Fatal("expected enabled poller to ingest TrueNAS resources")
}
requestCountAfterStop := mock.RequestCount()
recordCountAfterStop := len(poller.GetCurrentRecordsForOrg("default"))
truenas.SetFeatureEnabled(false)
poller.Start(context.Background())
if poller.cancel != nil {
t.Fatal("expected Start() to remain a no-op after disable without restarting process")
}
noPollDeadline := time.Now().Add(200 * time.Millisecond)
waitForCondition(t, 500*time.Millisecond, func() bool {
return time.Now().After(noPollDeadline) && mock.RequestCount() == requestCountAfterStop
}, "expected no additional polling requests after disable and restart attempt")
if got := len(poller.GetCurrentRecordsForOrg("default")); got != recordCountAfterStop {
t.Fatalf("expected no new records after disable restart attempt, got before=%d after=%d", recordCountAfterStop, got)
}
}
func TestTrueNASPollerKillSwitchAllConnectionsRemoved(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
mock := newTrueNASMockServer(t, "nas-kill-switch")
t.Cleanup(mock.Close)
mtp, persistence := newTestTenantPersistence(t)
connection := trueNASInstanceForServer(t, "kill-switch-conn", mock.URL(), true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
return pollerProviderCount(poller) == 1 && pollerHasProvider(poller, connection.ID) && mock.RequestCount() >= 5
}, "expected initial TrueNAS connection to be active and polling")
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{}); err != nil {
t.Fatalf("SaveTrueNASConfig() clear error = %v", err)
}
waitForCondition(t, 2*time.Second, func() bool {
return pollerProviderCount(poller) == 0
}, "expected all TrueNAS providers to be drained after removing all connections")
if pollerHasProvider(poller, connection.ID) {
t.Fatalf("expected provider %q to be removed after kill-switch config update", connection.ID)
}
requestCountAfterDrain := mock.RequestCount()
noPollDeadline := time.Now().Add(200 * time.Millisecond)
waitForCondition(t, 500*time.Millisecond, func() bool {
return time.Now().After(noPollDeadline) && mock.RequestCount() == requestCountAfterDrain
}, "expected no further polling after all TrueNAS connections are removed")
poller.Stop()
}
func TestTrueNASPollerRecordsMetrics(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
var requestCount atomic.Int64
var errorCount atomic.Int64
var successCount atomic.Int64
var remainingFailures atomic.Int64
remainingFailures.Store(3)
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestCount.Add(1)
w.Header().Set("Content-Type", "application/json")
if remainingFailures.Load() > 0 {
remainingFailures.Add(-1)
errorCount.Add(1)
w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte(`{"error":"simulated failure"}`))
return
}
successCount.Add(1)
switch r.URL.Path {
case "/api/v2.0/system/info":
_, _ = w.Write([]byte(`{"hostname":"metrics-host","version":"TrueNAS-SCALE-24.10.2","buildtime":"24.10.2.1","uptime_seconds":86400,"system_serial":"SER-001"}`))
case "/api/v2.0/pool":
_, _ = w.Write([]byte(`[{"id":1,"name":"tank","status":"ONLINE","size":1000,"allocated":400,"free":600}]`))
case "/api/v2.0/pool/dataset":
_, _ = w.Write([]byte(`[]`))
case "/api/v2.0/disk":
_, _ = w.Write([]byte(`[]`))
case "/api/v2.0/alert/list":
_, _ = w.Write([]byte(`[]`))
default:
http.NotFound(w, r)
}
}))
t.Cleanup(server.Close)
mtp, persistence := newTestTenantPersistence(t)
connection := trueNASInstanceForServer(t, "metrics-conn", server.URL, true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 5*time.Second, func() bool {
return successCount.Load() > 0 && hasTrueNASHostForOrg(poller, "default", "metrics-host")
}, "expected TrueNAS resources to appear after initial failures")
poller.Stop()
if !hasTrueNASHostForOrg(poller, "default", "metrics-host") {
t.Fatal("expected TrueNAS resources to appear after initial failures")
}
if errorCount.Load() == 0 {
t.Fatal("expected at least one failed request to exercise metrics error path")
}
if successCount.Load() == 0 {
t.Fatal("expected successful requests to exercise metrics success path")
}
if requestCount.Load() < errorCount.Load()+successCount.Load() {
t.Fatalf("unexpected request accounting: total=%d errors=%d successes=%d", requestCount.Load(), errorCount.Load(), successCount.Load())
}
}
func TestTrueNASPollerConnectionSummariesExposeObservedCounts(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
mock := newTrueNASMockServer(t, "summary-host")
t.Cleanup(mock.Close)
mtp, persistence := newTestTenantPersistence(t)
connection := trueNASInstanceForServer(t, "summary-conn", mock.URL(), true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
summaries := poller.ConnectionSummaries("default", []config.TrueNASInstance{connection})
summary, ok := summaries[connection.ID]
return ok && summary.Poll != nil && summary.Poll.LastSuccessAt != nil && summary.Observed != nil
}, "expected connection summary to include successful poll and observed counts")
summary := poller.ConnectionSummaries("default", []config.TrueNASInstance{connection})[connection.ID]
if summary.Poll == nil || summary.Poll.IntervalSeconds != 60 {
t.Fatalf("expected poll interval summary 60 seconds, got %+v", summary.Poll)
}
if summary.Observed == nil {
t.Fatal("expected observed summary to be present")
}
if summary.Observed.Host != "summary-host" || summary.Observed.ResourceID != "summary-host" {
t.Fatalf("unexpected observed host identity: %+v", summary.Observed)
}
if summary.Observed.Systems != 1 || summary.Observed.StoragePools != 1 || summary.Observed.Datasets != 1 || summary.Observed.Disks != 1 {
t.Fatalf("unexpected observed counts: %+v", summary.Observed)
}
}
func TestTrueNASPollerConnectionSummariesCaptureFailures(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusUnauthorized)
_, _ = w.Write([]byte(`{"error":"unauthorized"}`))
}))
t.Cleanup(server.Close)
mtp, persistence := newTestTenantPersistence(t)
connection := trueNASInstanceForServer(t, "summary-fail", server.URL, true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
summaries := poller.ConnectionSummaries("default", []config.TrueNASInstance{connection})
summary, ok := summaries[connection.ID]
return ok && summary.Poll != nil && summary.Poll.LastError != nil && summary.Poll.ConsecutiveFailures > 0
}, "expected connection summary to capture poll failure state")
summary := poller.ConnectionSummaries("default", []config.TrueNASInstance{connection})[connection.ID]
if summary.Poll == nil || summary.Poll.LastError == nil {
t.Fatalf("expected poll failure summary, got %+v", summary.Poll)
}
if summary.Poll.LastError.Category != "auth" {
t.Fatalf("expected auth error category, got %+v", summary.Poll.LastError)
}
}
func TestTrueNASPollerManualConnectionTestsUpdateSummariesWithoutClearingObservedCounts(t *testing.T) {
poller := NewTrueNASPoller(nil, time.Minute, nil)
connection := config.TrueNASInstance{
ID: "manual-test-conn",
Host: "manual-test.local",
APIKey: "secret",
UseHTTPS: true,
Enabled: true,
PollIntervalSecs: 120,
}
connection.ApplyDefaults()
snapshot := &truenas.FixtureSnapshot{
System: truenas.SystemInfo{
Hostname: "manual-test",
},
Pools: []truenas.Pool{{Name: "tank"}},
}
firstSuccess := time.Date(2026, time.March, 30, 10, 0, 0, 0, time.UTC)
failureAt := firstSuccess.Add(2 * time.Minute)
manualSuccessAt := failureAt.Add(2 * time.Minute)
poller.mu.Lock()
poller.recordConnectionSuccessLocked("default", connection.ID, connection, firstSuccess, snapshot)
poller.recordConnectionFailureLocked("default", connection.ID, connection, errors.New("manual auth failed"), failureAt)
poller.mu.Unlock()
poller.RecordConnectionTestSuccess("default", connection.ID, connection, manualSuccessAt)
summary := poller.ConnectionSummaries("default", []config.TrueNASInstance{connection})[connection.ID]
if summary.Poll == nil || summary.Poll.LastSuccessAt == nil {
t.Fatalf("expected manual success to update poll summary, got %+v", summary.Poll)
}
if summary.Poll.LastError != nil {
t.Fatalf("expected manual success to clear previous error, got %+v", summary.Poll.LastError)
}
if summary.Observed == nil || summary.Observed.Host != "manual-test" || summary.Observed.StoragePools != 1 {
t.Fatalf("expected observed summary to be preserved after manual success, got %+v", summary.Observed)
}
}
func TestTrueNASPollerHonorsConfiguredPollInterval(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
mock := newTrueNASMockServer(t, "interval-host")
t.Cleanup(mock.Close)
mtp, persistence := newTestTenantPersistence(t)
connection := trueNASInstanceForServer(t, "interval-conn", mock.URL(), true)
connection.PollIntervalSecs = 1
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, 0, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
return mock.RequestCount() >= 5
}, "expected initial immediate poll for configured TrueNAS connection")
requestCountAfterFirstPoll := mock.RequestCount()
time.Sleep(400 * time.Millisecond)
if got := mock.RequestCount(); got != requestCountAfterFirstPoll {
t.Fatalf("expected configured 1s poll interval to avoid an early repoll, got before=%d after=%d", requestCountAfterFirstPoll, got)
}
waitForCondition(t, 2*time.Second, func() bool {
return mock.RequestCount() > requestCountAfterFirstPoll
}, "expected configured 1s poll interval to trigger the next poll without waiting for the 60s default")
}
func TestTrueNASPollerPhysicalDiskTemperatureHistoryUsesTenantScopedProvider(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
fixtures := truenas.DefaultFixtures()
now := time.Date(2026, 3, 29, 20, 0, 0, 0, time.UTC)
fetcher := &controllableTrueNASHistoryFetcher{
snapshot: &fixtures,
history: map[string][]truenas.TimeSeriesPoint{
"sda": {
{Timestamp: now.Add(-2 * time.Hour), Value: 30},
{Timestamp: now.Add(-1 * time.Hour), Value: 32},
{Timestamp: now, Value: 34},
},
},
}
provider := truenas.NewLiveProvider(fetcher)
if err := provider.Refresh(context.Background()); err != nil {
t.Fatalf("Refresh() error = %v", err)
}
poller := NewTrueNASPoller(nil, time.Minute, nil)
poller.providersByOrg["default"] = map[string]*truenas.Provider{
"conn-1": provider,
}
history := poller.PhysicalDiskTemperatureHistory(nil, "default", 4*time.Hour)
points, ok := history["ZL0A1234"]
if !ok {
t.Fatalf("expected canonical metric resource id ZL0A1234, got %#v", history)
}
if len(points) != 3 || points[len(points)-1].Value != 34 {
t.Fatalf("unexpected tenant-scoped disk history: %+v", points)
}
}
func TestTrueNASPollerGuestMetricHistoryUsesTenantScopedProvider(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
fixtures := truenas.DefaultFixtures()
now := time.Date(2026, 3, 29, 20, 0, 0, 0, time.UTC)
fetcher := &controllableTrueNASHistoryFetcher{
snapshot: &fixtures,
systemHistory: &truenas.SystemMetricHistory{
CPUPercent: []truenas.TimeSeriesPoint{
{Timestamp: now.Add(-2 * time.Hour), Value: 20},
{Timestamp: now, Value: 34},
},
MemoryPercent: []truenas.TimeSeriesPoint{
{Timestamp: now.Add(-2 * time.Hour), Value: 45},
{Timestamp: now, Value: 62},
},
},
}
provider := truenas.NewLiveProvider(fetcher)
if err := provider.Refresh(context.Background()); err != nil {
t.Fatalf("Refresh() error = %v", err)
}
poller := NewTrueNASPoller(nil, time.Minute, nil)
poller.providersByOrg["default"] = map[string]*truenas.Provider{
"conn-1": provider,
}
history := poller.GuestMetricHistory(nil, "default", "agent", 4*time.Hour)
metricMap, ok := history["truenas-main"]
if !ok {
t.Fatalf("expected canonical agent metric id truenas-main, got %#v", history)
}
if len(metricMap["cpu"]) != 2 || metricMap["cpu"][1].Value != 34 {
t.Fatalf("unexpected cpu history: %+v", metricMap["cpu"])
}
if len(metricMap["memory"]) != 2 || metricMap["memory"][1].Value != 62 {
t.Fatalf("unexpected memory history: %+v", metricMap["memory"])
}
}
type controllableTrueNASHistoryFetcher struct {
snapshot *truenas.FixtureSnapshot
history map[string][]truenas.TimeSeriesPoint
systemHistory *truenas.SystemMetricHistory
}
func (s *controllableTrueNASHistoryFetcher) Fetch(context.Context) (*truenas.FixtureSnapshot, error) {
if s == nil || s.snapshot == nil {
return nil, nil
}
copied := *s.snapshot
copied.Disks = append([]truenas.Disk(nil), s.snapshot.Disks...)
copied.Pools = append([]truenas.Pool(nil), s.snapshot.Pools...)
copied.Datasets = append([]truenas.Dataset(nil), s.snapshot.Datasets...)
copied.Alerts = append([]truenas.Alert(nil), s.snapshot.Alerts...)
copied.Apps = append([]truenas.App(nil), s.snapshot.Apps...)
copied.ZFSSnapshots = append([]truenas.ZFSSnapshot(nil), s.snapshot.ZFSSnapshots...)
copied.ReplicationTasks = append([]truenas.ReplicationTask(nil), s.snapshot.ReplicationTasks...)
return &copied, nil
}
func (s *controllableTrueNASHistoryFetcher) DiskTemperatureHistory(_ context.Context, identifiers []string, _ time.Duration) (map[string][]truenas.TimeSeriesPoint, error) {
result := make(map[string][]truenas.TimeSeriesPoint)
for _, identifier := range identifiers {
points, ok := s.history[identifier]
if !ok || len(points) == 0 {
continue
}
copied := make([]truenas.TimeSeriesPoint, len(points))
copy(copied, points)
result[identifier] = copied
}
if len(result) == 0 {
return nil, nil
}
return result, nil
}
func (s *controllableTrueNASHistoryFetcher) SystemMetricHistory(context.Context, time.Duration) (*truenas.SystemMetricHistory, error) {
if s == nil || s.systemHistory == nil {
return nil, nil
}
copied := *s.systemHistory
copied.CPUPercent = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.CPUPercent...)
copied.MemoryPercent = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.MemoryPercent...)
copied.MemoryUsedBytes = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.MemoryUsedBytes...)
copied.MemoryAvailableBytes = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.MemoryAvailableBytes...)
copied.MemoryTotalBytes = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.MemoryTotalBytes...)
copied.NetInRate = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.NetInRate...)
copied.NetOutRate = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.NetOutRate...)
copied.DiskReadRate = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.DiskReadRate...)
copied.DiskWriteRate = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.DiskWriteRate...)
return &copied, nil
}
type pollerControlFetcher struct {
snapshot *truenas.FixtureSnapshot
startCalls []string
stopCalls []string
logReads []pollerLogReadCall
}
type pollerLogReadCall struct {
appName string
containerID string
tailLines int
}
func (f *pollerControlFetcher) Fetch(context.Context) (*truenas.FixtureSnapshot, error) {
if f == nil {
return nil, nil
}
return copyTrueNASSnapshot(f.snapshot), nil
}
func (f *pollerControlFetcher) StartApp(_ context.Context, appID string) error {
f.startCalls = append(f.startCalls, appID)
for i := range f.snapshot.Apps {
if f.snapshot.Apps[i].ID == appID {
f.snapshot.Apps[i].State = "RUNNING"
if len(f.snapshot.Apps[i].Containers) > 0 {
f.snapshot.Apps[i].Containers[0].State = "running"
}
}
}
return nil
}
func (f *pollerControlFetcher) StopApp(_ context.Context, appID string) error {
f.stopCalls = append(f.stopCalls, appID)
for i := range f.snapshot.Apps {
if f.snapshot.Apps[i].ID == appID {
f.snapshot.Apps[i].State = "STOPPED"
if len(f.snapshot.Apps[i].Containers) > 0 {
f.snapshot.Apps[i].Containers[0].State = "stopped"
}
}
}
return nil
}
func (f *pollerControlFetcher) ReadAppLogs(_ context.Context, appName, containerID string, tailLines int) ([]truenas.AppLogLine, error) {
f.logReads = append(f.logReads, pollerLogReadCall{
appName: appName,
containerID: containerID,
tailLines: tailLines,
})
return []truenas.AppLogLine{
{Timestamp: "2026-03-29T18:00:00Z", Data: "ready"},
}, nil
}
func TestTrueNASPollerControlAppRefreshesCachedRecords(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
fixtures := truenas.DefaultFixtures()
for i := range fixtures.Apps {
if fixtures.Apps[i].ID == "nextcloud" {
fixtures.Apps[i].State = "STOPPED"
if len(fixtures.Apps[i].Containers) > 0 {
fixtures.Apps[i].Containers[0].State = "stopped"
}
}
}
fetcher := &pollerControlFetcher{snapshot: &fixtures}
provider := truenas.NewLiveProvider(fetcher)
if err := provider.Refresh(context.Background()); err != nil {
t.Fatalf("Refresh() error = %v", err)
}
poller := NewTrueNASPoller(nil, 0, nil)
poller.providersByOrg["default"] = map[string]*truenas.Provider{"conn-1": provider}
poller.cachedRecordsByOrg["default"] = map[string][]unifiedresources.IngestRecord{"conn-1": provider.Records()}
app, err := poller.ControlApp(context.Background(), "default", "truenas-main", "nextcloud", "start")
if err != nil {
t.Fatalf("ControlApp() error = %v", err)
}
if app == nil || app.State != "RUNNING" {
t.Fatalf("expected RUNNING app after control, got %+v", app)
}
if len(fetcher.startCalls) != 1 || fetcher.startCalls[0] != "nextcloud" {
t.Fatalf("expected start call for nextcloud, got %+v", fetcher.startCalls)
}
if got := len(poller.cachedRecordsByOrg["default"]["conn-1"]); got == 0 {
t.Fatal("expected refreshed cached records after app control")
}
}
func TestTrueNASPollerReadAppLogsUsesTenantScopedProvider(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
fixtures := truenas.DefaultFixtures()
fetcher := &pollerControlFetcher{snapshot: &fixtures}
provider := truenas.NewLiveProvider(fetcher)
if err := provider.Refresh(context.Background()); err != nil {
t.Fatalf("Refresh() error = %v", err)
}
poller := NewTrueNASPoller(nil, 0, nil)
poller.providersByOrg["default"] = map[string]*truenas.Provider{"conn-1": provider}
poller.cachedRecordsByOrg["default"] = map[string][]unifiedresources.IngestRecord{"conn-1": provider.Records()}
result, err := poller.ReadAppLogs(context.Background(), "default", "truenas-main", "nextcloud", "", 20)
if err != nil {
t.Fatalf("ReadAppLogs() error = %v", err)
}
if result == nil || result.App.Name != "Nextcloud" {
t.Fatalf("expected Nextcloud log result, got %+v", result)
}
if result.Container.ID != "nextcloud-web-1" {
t.Fatalf("expected canonical primary container, got %+v", result.Container)
}
if len(fetcher.logReads) != 1 {
t.Fatalf("expected one log read, got %+v", fetcher.logReads)
}
if call := fetcher.logReads[0]; call.appName != "nextcloud" || call.containerID != "nextcloud-web-1" || call.tailLines != 20 {
t.Fatalf("unexpected log read call: %+v", call)
}
}
func TestTrueNASPollerGetAppConfigUsesTenantScopedProvider(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
fixtures := truenas.DefaultFixtures()
fetcher := &pollerControlFetcher{snapshot: &fixtures}
provider := truenas.NewLiveProvider(fetcher)
if err := provider.Refresh(context.Background()); err != nil {
t.Fatalf("Refresh() error = %v", err)
}
poller := NewTrueNASPoller(nil, 0, nil)
poller.providersByOrg["default"] = map[string]*truenas.Provider{"conn-1": provider}
poller.cachedRecordsByOrg["default"] = map[string][]unifiedresources.IngestRecord{"conn-1": provider.Records()}
result, err := poller.GetAppConfig(context.Background(), "default", "truenas-main", "nextcloud")
if err != nil {
t.Fatalf("GetAppConfig() error = %v", err)
}
if result == nil || result.App.Name != "Nextcloud" {
t.Fatalf("expected Nextcloud config result, got %+v", result)
}
if result.Host != "truenas-main" {
t.Fatalf("expected config host truenas-main, got %+v", result)
}
if len(result.App.Containers) != 2 {
t.Fatalf("expected canonical app runtime shape, got %+v", result.App.Containers)
}
}
func TestTrueNASPollerHandlesConnectionAddRemove(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
first := newTrueNASMockServer(t, "nas-one")
second := newTrueNASMockServer(t, "nas-two")
t.Cleanup(first.Close)
t.Cleanup(second.Close)
mtp, persistence := newTestTenantPersistence(t)
connOne := trueNASInstanceForServer(t, "conn-1", first.URL(), true)
connTwo := trueNASInstanceForServer(t, "conn-2", second.URL(), true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connOne}); err != nil {
t.Fatalf("SaveTrueNASConfig() initial error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
return pollerProviderCount(poller) == 1 && first.RequestCount() >= 5
}, "expected first connection provider and successful poll cycle")
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connOne, connTwo}); err != nil {
t.Fatalf("SaveTrueNASConfig() add error = %v", err)
}
waitForCondition(t, 2*time.Second, func() bool {
return pollerProviderCount(poller) == 2 && second.RequestCount() >= 5
}, "expected second connection to be discovered and polled")
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connTwo}); err != nil {
t.Fatalf("SaveTrueNASConfig() remove error = %v", err)
}
waitForCondition(t, 2*time.Second, func() bool {
return pollerProviderCount(poller) == 1 && !pollerHasProvider(poller, "conn-1")
}, "expected removed connection provider to be pruned")
poller.Stop()
if hasTrueNASHostForOrg(poller, "default", "nas-one") {
t.Fatal("expected first host resources to be removed after pruning provider")
}
if !hasTrueNASHostForOrg(poller, "default", "nas-two") {
t.Fatal("expected second host resources to be ingested")
}
}
func copyTrueNASSnapshot(snapshot *truenas.FixtureSnapshot) *truenas.FixtureSnapshot {
if snapshot == nil {
return nil
}
cloned := *snapshot
cloned.Pools = append([]truenas.Pool(nil), snapshot.Pools...)
cloned.Datasets = append([]truenas.Dataset(nil), snapshot.Datasets...)
cloned.Disks = append([]truenas.Disk(nil), snapshot.Disks...)
cloned.Alerts = append([]truenas.Alert(nil), snapshot.Alerts...)
cloned.ZFSSnapshots = append([]truenas.ZFSSnapshot(nil), snapshot.ZFSSnapshots...)
cloned.ReplicationTasks = append([]truenas.ReplicationTask(nil), snapshot.ReplicationTasks...)
if snapshot.System.TemperatureCelsius != nil {
cloned.System.TemperatureCelsius = make(map[string]float64, len(snapshot.System.TemperatureCelsius))
for key, value := range snapshot.System.TemperatureCelsius {
cloned.System.TemperatureCelsius[key] = value
}
}
if len(snapshot.Apps) > 0 {
cloned.Apps = make([]truenas.App, len(snapshot.Apps))
for i, app := range snapshot.Apps {
appCopy := app
appCopy.UsedHostIPs = append([]string(nil), app.UsedHostIPs...)
appCopy.UsedPorts = append([]truenas.AppPort(nil), app.UsedPorts...)
appCopy.Volumes = append([]truenas.AppVolume(nil), app.Volumes...)
appCopy.Images = append([]string(nil), app.Images...)
appCopy.Networks = append([]truenas.AppNetwork(nil), app.Networks...)
appCopy.Containers = append([]truenas.AppContainer(nil), app.Containers...)
if app.Stats != nil {
statsCopy := *app.Stats
statsCopy.Interfaces = append([]truenas.AppInterfaceStats(nil), app.Stats.Interfaces...)
appCopy.Stats = &statsCopy
}
cloned.Apps[i] = appCopy
}
}
return &cloned
}
func TestTrueNASPollerAPITimeout(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
var requestCount atomic.Int64
var injectDelay atomic.Bool
injectDelay.Store(true)
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestCount.Add(1)
w.Header().Set("Content-Type", "application/json")
if r.URL.Path == "/api/v2.0/system/info" && injectDelay.Load() {
time.Sleep(200 * time.Millisecond)
}
switch r.URL.Path {
case "/api/v2.0/system/info":
_, _ = w.Write([]byte(`{"hostname":"timeout-host","version":"TrueNAS-SCALE-24.10.2","buildtime":"24.10.2.1","uptime_seconds":86400,"system_serial":"SER-timeout-host"}`))
case "/api/v2.0/pool":
_, _ = w.Write([]byte(`[{"id":1,"name":"timeout-pool","status":"ONLINE","size":1000,"allocated":400,"free":600}]`))
case "/api/v2.0/pool/dataset":
_, _ = w.Write([]byte(`[]`))
case "/api/v2.0/disk":
_, _ = w.Write([]byte(`[]`))
case "/api/v2.0/alert/list":
_, _ = w.Write([]byte(`[]`))
default:
http.NotFound(w, r)
}
}))
t.Cleanup(server.Close)
mtp, persistence := newTestTenantPersistence(t)
connection := trueNASInstanceForServer(t, "timeout-conn", server.URL, true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
injectTrueNASProviderTimeout(t, poller, connection, 75*time.Millisecond)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
return requestCount.Load() >= 3
}, "expected poller to continue retrying while API requests time out")
injectDelay.Store(false)
recoveryStart := requestCount.Load()
waitForCondition(t, 3*time.Second, func() bool {
return requestCount.Load() >= recoveryStart+5
}, "expected at least one successful poll cycle after timeout clears")
poller.Stop()
if !hasTrueNASHostForOrg(poller, "default", "timeout-host") {
t.Fatal("expected poller to recover and ingest TrueNAS resources after timeout clears")
}
}
func TestTrueNASPollerAuthFailure(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
var requestCount atomic.Int64
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestCount.Add(1)
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusUnauthorized)
_, _ = w.Write([]byte(`{"error":"unauthorized"}`))
}))
t.Cleanup(server.Close)
mtp, persistence := newTestTenantPersistence(t)
connection := trueNASInstanceForServer(t, "auth-failure-conn", server.URL, true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
return requestCount.Load() >= 2
}, "expected at least two poll attempts with auth failures")
before := requestCount.Load()
waitForCondition(t, 2*time.Second, func() bool {
return requestCount.Load() > before
}, "expected poller to keep attempting after repeated auth failures")
select {
case <-poller.stopped:
t.Fatal("expected poller to keep running after auth failures")
default:
}
poller.Stop()
if hasTrueNASHostForOrg(poller, "default", "auth-failure-host") {
t.Fatal("expected no resources to be ingested when every poll fails auth")
}
}
func TestTrueNASPollerStaleDataRecovery(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
const (
initialSuccessPolls = int64(2)
failurePolls = int64(3)
)
var pollAttempts atomic.Int64
var initialSuccesses atomic.Int64
var recoverySuccesses atomic.Int64
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
attempt := pollAttempts.Load()
if r.URL.Path == "/api/v2.0/system/info" {
attempt = pollAttempts.Add(1)
switch {
case attempt <= initialSuccessPolls:
_, _ = w.Write([]byte(`{"hostname":"stale-before","version":"TrueNAS-SCALE-24.10.2","buildtime":"24.10.2.1","uptime_seconds":86400,"system_serial":"SER-stale-before"}`))
case attempt <= initialSuccessPolls+failurePolls:
w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte(`{"error":"simulated outage"}`))
default:
_, _ = w.Write([]byte(`{"hostname":"stale-after","version":"TrueNAS-SCALE-24.10.2","buildtime":"24.10.2.1","uptime_seconds":86500,"system_serial":"SER-stale-after"}`))
}
return
}
if attempt <= initialSuccessPolls {
switch r.URL.Path {
case "/api/v2.0/pool":
_, _ = w.Write([]byte(`[{"id":1,"name":"before-pool","status":"ONLINE","size":1000,"allocated":400,"free":600}]`))
case "/api/v2.0/pool/dataset":
_, _ = w.Write([]byte(`[]`))
case "/api/v2.0/disk":
_, _ = w.Write([]byte(`[]`))
case "/api/v2.0/alert/list":
initialSuccesses.Add(1)
_, _ = w.Write([]byte(`[]`))
default:
http.NotFound(w, r)
}
return
}
switch r.URL.Path {
case "/api/v2.0/pool":
_, _ = w.Write([]byte(`[{"id":1,"name":"after-pool","status":"ONLINE","size":1000,"allocated":500,"free":500}]`))
case "/api/v2.0/pool/dataset":
_, _ = w.Write([]byte(`[]`))
case "/api/v2.0/disk":
_, _ = w.Write([]byte(`[]`))
case "/api/v2.0/alert/list":
recoverySuccesses.Add(1)
_, _ = w.Write([]byte(`[]`))
default:
http.NotFound(w, r)
}
}))
t.Cleanup(server.Close)
mtp, persistence := newTestTenantPersistence(t)
connection := trueNASInstanceForServer(t, "stale-recovery-conn", server.URL, true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
return initialSuccesses.Load() > 0
}, "expected initial successful polls to ingest baseline resources")
waitForCondition(t, 3*time.Second, func() bool {
return pollAttempts.Load() >= initialSuccessPolls+failurePolls
}, "expected poller to continue attempts throughout failure window")
waitForCondition(t, 3*time.Second, func() bool {
return recoverySuccesses.Load() > 0
}, "expected poller to recover and ingest refreshed data after failures")
poller.Stop()
if !hasTrueNASHostForOrg(poller, "default", "stale-after") {
t.Fatal("expected recovered TrueNAS host data to be ingested")
}
}
func TestTrueNASPollerConnectionFlap(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
var requestCount atomic.Int64
var isDown atomic.Bool
var recovered atomic.Bool
var beforeDownSuccesses atomic.Int64
var afterRecoverySuccesses atomic.Int64
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestCount.Add(1)
w.Header().Set("Content-Type", "application/json")
if isDown.Load() {
w.WriteHeader(http.StatusServiceUnavailable)
_, _ = w.Write([]byte(`{"error":"temporarily unavailable"}`))
return
}
hostname := "flap-before"
if recovered.Load() {
hostname = "flap-after"
}
switch r.URL.Path {
case "/api/v2.0/system/info":
_, _ = w.Write([]byte(`{"hostname":"` + hostname + `","version":"TrueNAS-SCALE-24.10.2","buildtime":"24.10.2.1","uptime_seconds":86400,"system_serial":"SER-` + hostname + `"}`))
case "/api/v2.0/pool":
_, _ = w.Write([]byte(`[{"id":1,"name":"flap-pool","status":"ONLINE","size":1000,"allocated":400,"free":600}]`))
case "/api/v2.0/pool/dataset":
_, _ = w.Write([]byte(`[]`))
case "/api/v2.0/disk":
_, _ = w.Write([]byte(`[]`))
case "/api/v2.0/alert/list":
if recovered.Load() {
afterRecoverySuccesses.Add(1)
} else {
beforeDownSuccesses.Add(1)
}
_, _ = w.Write([]byte(`[]`))
default:
http.NotFound(w, r)
}
}))
t.Cleanup(server.Close)
mtp, persistence := newTestTenantPersistence(t)
connection := trueNASInstanceForServer(t, "connection-flap-conn", server.URL, true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
return beforeDownSuccesses.Load() > 0
}, "expected initial TrueNAS ingest before simulated outage")
isDown.Store(true)
startedDownAt := requestCount.Load()
waitForCondition(t, 2*time.Second, func() bool {
return requestCount.Load() >= startedDownAt+3
}, "expected poller to continue making requests while endpoint is down")
recovered.Store(true)
isDown.Store(false)
waitForCondition(t, 3*time.Second, func() bool {
return afterRecoverySuccesses.Load() > 0
}, "expected poller to recover ingestion after endpoint returns")
poller.Stop()
if !hasTrueNASHostForOrg(poller, "default", "flap-after") {
t.Fatal("expected recovered endpoint data to be ingested")
}
}
func TestTrueNASPollerConcurrentConfigChange(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
first := newTrueNASMockServer(t, "config-change-one")
second := newTrueNASMockServer(t, "config-change-two")
t.Cleanup(first.Close)
t.Cleanup(second.Close)
mtp, persistence := newTestTenantPersistence(t)
connOne := trueNASInstanceForServer(t, "config-change-1", first.URL(), true)
connTwo := trueNASInstanceForServer(t, "config-change-2", second.URL(), true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connOne}); err != nil {
t.Fatalf("SaveTrueNASConfig() initial error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
return pollerProviderCount(poller) == 1 && pollerHasProvider(poller, connOne.ID) && first.RequestCount() >= 5
}, "expected first connection to be active before config updates")
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connOne, connTwo}); err != nil {
t.Fatalf("SaveTrueNASConfig() add error = %v", err)
}
waitForCondition(t, 2*time.Second, func() bool {
return pollerProviderCount(poller) == 2 && pollerHasProvider(poller, connOne.ID) && pollerHasProvider(poller, connTwo.ID) && second.RequestCount() >= 5
}, "expected second connection to appear while poller is running")
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connTwo}); err != nil {
t.Fatalf("SaveTrueNASConfig() remove error = %v", err)
}
waitForCondition(t, 2*time.Second, func() bool {
return pollerProviderCount(poller) == 1 && !pollerHasProvider(poller, connOne.ID) && pollerHasProvider(poller, connTwo.ID)
}, "expected provider map to converge after removing first connection")
poller.Stop()
if !hasTrueNASHostForOrg(poller, "default", "config-change-two") {
t.Fatal("expected second connection resources to be ingested")
}
}
func TestTrueNASPollerRebindsProviderWhenConnectionConfigChanges(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
first := newTrueNASMockServer(t, "config-rebind-one")
second := newTrueNASMockServer(t, "config-rebind-two")
t.Cleanup(first.Close)
t.Cleanup(second.Close)
mtp, persistence := newTestTenantPersistence(t)
connection := trueNASInstanceForServer(t, "config-rebind", first.URL(), true)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() initial error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
return pollerProviderCount(poller) == 1 && first.RequestCount() >= 5
}, "expected initial TrueNAS connection to poll before config change")
updated := trueNASInstanceForServer(t, connection.ID, second.URL(), true)
updated.Name = connection.Name
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{updated}); err != nil {
t.Fatalf("SaveTrueNASConfig() updated error = %v", err)
}
waitForCondition(t, 2*time.Second, func() bool {
return pollerProviderCount(poller) == 1 && second.RequestCount() >= 5
}, "expected provider to rebind to updated TrueNAS endpoint")
firstCountAfterRebind := first.RequestCount()
noPollDeadline := time.Now().Add(200 * time.Millisecond)
waitForCondition(t, 500*time.Millisecond, func() bool {
return time.Now().After(noPollDeadline) && first.RequestCount() == firstCountAfterRebind
}, "expected replaced TrueNAS endpoint to stop receiving poll requests")
poller.Stop()
if hasTrueNASHostForOrg(poller, "default", "config-rebind-one") {
t.Fatal("expected old TrueNAS host to be replaced after config rebind")
}
if !hasTrueNASHostForOrg(poller, "default", "config-rebind-two") {
t.Fatal("expected updated TrueNAS host to be ingested after config rebind")
}
}
func TestTrueNASPollerSkipsDisabledConnections(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
enabled := newTrueNASMockServer(t, "nas-enabled")
disabled := newTrueNASMockServer(t, "nas-disabled")
t.Cleanup(enabled.Close)
t.Cleanup(disabled.Close)
mtp, persistence := newTestTenantPersistence(t)
enabledConn := trueNASInstanceForServer(t, "conn-enabled", enabled.URL(), true)
disabledConn := trueNASInstanceForServer(t, "conn-disabled", disabled.URL(), false)
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{enabledConn, disabledConn}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
return pollerProviderCount(poller) == 1 && enabled.RequestCount() >= 5
}, "expected only enabled connection provider and resources")
waitForCondition(t, 2*time.Second, func() bool {
return enabled.RequestCount() >= 10
}, "expected additional polling cycles for enabled connection")
if disabled.RequestCount() != 0 {
t.Fatalf("expected disabled connection to be skipped, got %d requests", disabled.RequestCount())
}
poller.Stop()
if !hasTrueNASHostForOrg(poller, "default", "nas-enabled") {
t.Fatal("expected enabled connection host to be present in cached records")
}
if hasTrueNASHostForOrg(poller, "default", "nas-disabled") {
t.Fatal("expected disabled connection host to be absent from cached records")
}
}
func TestTrueNASPollerCachesRecordsPerOrganization(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
defaultOrgMock := newTrueNASMockServer(t, "default-nas")
tenantMock := newTrueNASMockServer(t, "tenant-nas")
t.Cleanup(defaultOrgMock.Close)
t.Cleanup(tenantMock.Close)
mtp, defaultPersistence := newTestTenantPersistence(t)
defaultConn := trueNASInstanceForServer(t, "default-conn", defaultOrgMock.URL(), true)
if err := defaultPersistence.SaveTrueNASConfig([]config.TrueNASInstance{defaultConn}); err != nil {
t.Fatalf("SaveTrueNASConfig(default) error = %v", err)
}
tenantPersistence, err := mtp.GetPersistence("org-a")
if err != nil {
t.Fatalf("GetPersistence(org-a) error = %v", err)
}
tenantConn := trueNASInstanceForServer(t, "tenant-conn", tenantMock.URL(), true)
if err := tenantPersistence.SaveTrueNASConfig([]config.TrueNASInstance{tenantConn}); err != nil {
t.Fatalf("SaveTrueNASConfig(org-a) error = %v", err)
}
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
t.Cleanup(poller.Stop)
waitForCondition(t, 2*time.Second, func() bool {
return defaultOrgMock.RequestCount() >= 5 &&
tenantMock.RequestCount() >= 5 &&
hasTrueNASHostForOrg(poller, "default", "default-nas") &&
hasTrueNASHostForOrg(poller, "org-a", "tenant-nas")
}, "expected polling for both default and org-a TrueNAS connections and cached records for each org")
poller.Stop()
if !hasTrueNASHostForOrg(poller, "default", "default-nas") {
t.Fatal("expected default org records to include default host")
}
if hasTrueNASHostForOrg(poller, "default", "tenant-nas") {
t.Fatal("expected default org records to exclude tenant host")
}
if !hasTrueNASHostForOrg(poller, "org-a", "tenant-nas") {
t.Fatal("expected tenant records to include tenant host")
}
}
func TestTrueNASPollerStopsCleanly(t *testing.T) {
previous := truenas.IsFeatureEnabled()
truenas.SetFeatureEnabled(true)
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
mtp, _ := newTestTenantPersistence(t)
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
poller.Start(context.Background())
poller.Stop()
select {
case <-poller.stopped:
case <-time.After(time.Second):
t.Fatal("expected poller stopped channel to close")
}
}
func TestTrueNASPollerSnapshotOwnedSources(t *testing.T) {
poller := NewTrueNASPoller(nil, time.Second, nil)
defaultSources := poller.SnapshotOwnedSources()
if len(defaultSources) != 1 || defaultSources[0] != unifiedresources.SourceTrueNAS {
t.Fatalf("default owned sources = %#v, want [%q]", defaultSources, unifiedresources.SourceTrueNAS)
}
orgSources := poller.SnapshotOwnedSourcesForOrg("org-a")
if len(orgSources) != 1 || orgSources[0] != unifiedresources.SourceTrueNAS {
t.Fatalf("org owned sources = %#v, want [%q]", orgSources, unifiedresources.SourceTrueNAS)
}
}
func TestTrueNASPollerSupplementalInventoryReadyAtUsesPersistedActiveConnections(t *testing.T) {
mtp, persistence := newTestTenantPersistence(t)
connection := config.NewTrueNASInstance()
connection.ID = "conn-ready"
connection.Host = "nas-ready.lab.local"
connection.APIKey = "api-key"
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
t.Fatalf("SaveTrueNASConfig() error = %v", err)
}
poller := NewTrueNASPoller(mtp, time.Second, nil)
if readyAt, settled := poller.SupplementalInventoryReadyAt(nil, "default"); settled || !readyAt.IsZero() {
t.Fatalf("SupplementalInventoryReadyAt() before any attempt = (%v, %t), want (zero, false)", readyAt, settled)
}
attemptedAt := time.Now().UTC()
poller.RecordConnectionTestSuccess("default", connection.ID, connection, attemptedAt)
readyAt, settled := poller.SupplementalInventoryReadyAt(nil, "default")
if !settled {
t.Fatal("expected readiness to settle after the first recorded attempt")
}
if !readyAt.Equal(attemptedAt) {
t.Fatalf("SupplementalInventoryReadyAt() = %v, want %v", readyAt, attemptedAt)
}
}
func TestTrueNASPollerSyncConnectionsLogsStructuredContextWhenPersistenceNil(t *testing.T) {
logOutput := captureTrueNASPollerLogs(t)
poller := NewTrueNASPoller(nil, time.Second, nil)
poller.syncConnections()
for _, expected := range []string{
`"level":"warn"`,
`"component":"truenas_poller"`,
`"action":"sync_connections"`,
`"message":"TrueNAS poller cannot sync connections because multi-tenant persistence is nil"`,
} {
if !strings.Contains(logOutput.String(), expected) {
t.Fatalf("expected log output to include %s, got %q", expected, logOutput.String())
}
}
}
func TestTrueNASPollerPollAllLogsStructuredContextOnRefreshFailure(t *testing.T) {
logOutput := captureTrueNASPollerLogs(t)
poller := NewTrueNASPoller(nil, time.Second, nil)
poller.mu.Lock()
if poller.configsByOrg == nil {
poller.configsByOrg = make(map[string]map[string]config.TrueNASInstance)
}
if poller.configsByOrg["default"] == nil {
poller.configsByOrg["default"] = make(map[string]config.TrueNASInstance)
}
connection := config.NewTrueNASInstance()
connection.ID = "conn-refresh-fail"
connection.Host = "nas-refresh-fail.lab.local"
connection.APIKey = "api-key"
poller.configsByOrg["default"][connection.ID] = connection
if poller.providersByOrg == nil {
poller.providersByOrg = make(map[string]map[string]*truenas.Provider)
}
if poller.providersByOrg["default"] == nil {
poller.providersByOrg["default"] = make(map[string]*truenas.Provider)
}
poller.providersByOrg["default"][connection.ID] = truenas.NewLiveProvider(failingTrueNASFetcher{err: fmt.Errorf("refresh exploded")})
poller.mu.Unlock()
poller.pollAll(context.Background())
for _, expected := range []string{
`"level":"warn"`,
`"component":"truenas_poller"`,
`"action":"refresh_connection"`,
`"connection_id":"conn-refresh-fail"`,
`"error":"refresh truenas snapshot: refresh exploded"`,
`"message":"TrueNAS poller refresh failed"`,
} {
if !strings.Contains(logOutput.String(), expected) {
t.Fatalf("expected log output to include %s, got %q", expected, logOutput.String())
}
}
}
func TestClassifyTrueNASError(t *testing.T) {
tests := []struct {
name string
err error
expectedType string
expectedRetry bool
}{
{
name: "nil error returns nil",
err: nil,
expectedType: "",
},
{
name: "APIError 401 classifies as auth",
err: &truenas.APIError{StatusCode: 401, Method: "GET", Path: "/system/info", Body: "Unauthorized"},
expectedType: "auth",
expectedRetry: false,
},
{
name: "APIError 403 classifies as auth",
err: &truenas.APIError{StatusCode: 403, Method: "GET", Path: "/pool", Body: "Forbidden"},
expectedType: "auth",
expectedRetry: false,
},
{
name: "APIError 500 classifies as api",
err: &truenas.APIError{StatusCode: 500, Method: "GET", Path: "/pool", Body: "Internal Server Error"},
expectedType: "api",
expectedRetry: true,
},
{
name: "APIError 408 classifies as timeout",
err: &truenas.APIError{StatusCode: 408, Method: "GET", Path: "/system/info", Body: "Request Timeout"},
expectedType: "timeout",
expectedRetry: true,
},
{
name: "APIError 504 classifies as timeout",
err: &truenas.APIError{StatusCode: 504, Method: "GET", Path: "/pool", Body: "Gateway Timeout"},
expectedType: "timeout",
expectedRetry: true,
},
{
name: "wrapped APIError 401 classifies as auth",
err: fmt.Errorf("fetch truenas system info: %w", &truenas.APIError{StatusCode: 401, Method: "GET", Path: "/system/info", Body: "Unauthorized"}),
expectedType: "auth",
expectedRetry: false,
},
{
name: "context.DeadlineExceeded classifies as timeout",
err: context.DeadlineExceeded,
expectedType: "timeout",
expectedRetry: true,
},
{
name: "wrapped context.DeadlineExceeded classifies as timeout",
err: fmt.Errorf("fetch truenas system info: %w", context.DeadlineExceeded),
expectedType: "timeout",
expectedRetry: true,
},
{
name: "url.Error with timeout classifies as timeout",
err: &url.Error{Op: "Get", URL: "https://truenas.local/api/v2.0/system/info", Err: context.DeadlineExceeded},
expectedType: "timeout",
expectedRetry: true,
},
{
name: "net.OpError classifies as connection",
err: &net.OpError{Op: "dial", Net: "tcp", Addr: nil, Err: fmt.Errorf("connection refused")},
expectedType: "connection",
expectedRetry: true,
},
{
name: "wrapped net.OpError classifies as connection",
err: fmt.Errorf("truenas request GET /system/info failed: %w", &net.OpError{Op: "dial", Net: "tcp", Addr: nil, Err: fmt.Errorf("connection refused")}),
expectedType: "connection",
expectedRetry: true,
},
{
name: "plain error classifies as api fallback",
err: fmt.Errorf("some unknown error"),
expectedType: "api",
expectedRetry: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := classifyTrueNASError(tt.err, "test-conn")
if tt.err == nil {
if result != nil {
t.Fatalf("expected nil, got %+v", result)
}
return
}
if result == nil {
t.Fatal("expected non-nil MonitorError")
}
if string(result.Type) != tt.expectedType {
t.Errorf("expected type %q, got %q", tt.expectedType, result.Type)
}
if result.Retryable != tt.expectedRetry {
t.Errorf("expected retryable=%v, got %v", tt.expectedRetry, result.Retryable)
}
if result.Instance != "test-conn" {
t.Errorf("expected instance %q, got %q", "test-conn", result.Instance)
}
if result.Op != "truenas_poll" {
t.Errorf("expected op %q, got %q", "truenas_poll", result.Op)
}
})
}
}
type trueNASMockServer struct {
server *httptest.Server
requests atomic.Int64
}
func newTrueNASMockServer(t *testing.T, hostname string) *trueNASMockServer {
t.Helper()
mock := &trueNASMockServer{}
poolName := "pool-" + hostname
mock.server = httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
mock.requests.Add(1)
writer.Header().Set("Content-Type", "application/json")
switch request.URL.Path {
case "/api/v2.0/system/info":
_, _ = writer.Write([]byte(`{"hostname":"` + hostname + `","version":"TrueNAS-SCALE-24.10.2","buildtime":"24.10.2.1","uptime_seconds":86400,"system_serial":"SER-` + hostname + `"}`))
case "/api/v2.0/pool":
_, _ = writer.Write([]byte(`[{"id":1,"name":"` + poolName + `","status":"ONLINE","size":1000,"allocated":400,"free":600}]`))
case "/api/v2.0/pool/dataset":
_, _ = writer.Write([]byte(`[{"id":"` + poolName + `/apps","name":"` + poolName + `/apps","pool":"` + poolName + `","used":{"rawvalue":"12345","parsed":12345},"available":{"rawvalue":"555","parsed":555},"mountpoint":"/mnt/` + poolName + `/apps","readonly":{"rawvalue":"off","parsed":false},"mounted":true}]`))
case "/api/v2.0/disk":
_, _ = writer.Write([]byte(`[{"identifier":"{disk-1}","name":"sda","serial":"SER-A","size":1000000,"model":"Seagate","type":"HDD","pool":"` + poolName + `","bus":"SATA","rotationrate":7200,"status":"ONLINE"}]`))
case "/api/v2.0/alert/list":
_, _ = writer.Write([]byte(`[{"id":"a1","level":"WARNING","formatted":"Disk temp high","source":"DiskService","dismissed":false,"datetime":{"$date":1707400000000}}]`))
default:
http.NotFound(writer, request)
}
}))
return mock
}
func (m *trueNASMockServer) URL() string {
return m.server.URL
}
func (m *trueNASMockServer) Close() {
if m != nil && m.server != nil {
m.server.Close()
}
}
func (m *trueNASMockServer) RequestCount() int64 {
if m == nil {
return 0
}
return m.requests.Load()
}
func trueNASInstanceForServer(t *testing.T, id string, rawURL string, enabled bool) config.TrueNASInstance {
t.Helper()
parsed, err := url.Parse(rawURL)
if err != nil {
t.Fatalf("url.Parse(%q) error = %v", rawURL, err)
}
port, err := strconv.Atoi(parsed.Port())
if err != nil {
t.Fatalf("parse port from %q error = %v", rawURL, err)
}
return config.TrueNASInstance{
ID: id,
Name: "connection-" + id,
Host: parsed.Hostname(),
Port: port,
APIKey: "test-api-key",
UseHTTPS: strings.EqualFold(parsed.Scheme, "https"),
Enabled: enabled,
}
}
func waitForCondition(t *testing.T, timeout time.Duration, condition func() bool, failureMessage string) {
t.Helper()
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if condition() {
return
}
time.Sleep(10 * time.Millisecond)
}
t.Fatal(failureMessage)
}
func newTestTenantPersistence(t *testing.T) (*config.MultiTenantPersistence, *config.ConfigPersistence) {
t.Helper()
mtp := config.NewMultiTenantPersistence(t.TempDir())
persistence, err := mtp.GetPersistence("default")
if err != nil {
t.Fatalf("GetPersistence(default) error = %v", err)
}
return mtp, persistence
}
func hasTrueNASHostForOrg(poller *TrueNASPoller, orgID, hostname string) bool {
if poller == nil {
return false
}
records := poller.GetCurrentRecordsForOrg(orgID)
if len(records) == 0 {
return false
}
registry := unifiedresources.NewRegistry(nil)
registry.IngestRecords(unifiedresources.SourceTrueNAS, records)
return hasTrueNASHost(registry, hostname)
}
func hasTrueNASHost(registry *unifiedresources.ResourceRegistry, hostname string) bool {
if registry == nil {
return false
}
resources := registry.List()
for _, resource := range resources {
if resource.Type != unifiedresources.ResourceTypeAgent || resource.Name != hostname {
continue
}
if resourceHasSource(resource, unifiedresources.SourceTrueNAS) {
return true
}
}
return false
}
func resourceHasSource(resource unifiedresources.Resource, source unifiedresources.DataSource) bool {
for _, candidate := range resource.Sources {
if candidate == source {
return true
}
}
return false
}
func pollerProviderCount(poller *TrueNASPoller) int {
if poller == nil {
return 0
}
poller.mu.Lock()
defer poller.mu.Unlock()
total := 0
for _, providers := range poller.providersByOrg {
total += len(providers)
}
return total
}
func pollerHasProvider(poller *TrueNASPoller, id string) bool {
if poller == nil {
return false
}
poller.mu.Lock()
defer poller.mu.Unlock()
providers := poller.providersByOrg["default"]
if providers == nil {
return false
}
_, ok := providers[id]
return ok
}
func injectTrueNASProviderTimeout(t *testing.T, poller *TrueNASPoller, instance config.TrueNASInstance, timeout time.Duration) {
t.Helper()
client, err := truenas.NewClient(truenas.ClientConfig{
Host: instance.Host,
Port: instance.Port,
APIKey: instance.APIKey,
Username: instance.Username,
Password: instance.Password,
UseHTTPS: instance.UseHTTPS,
InsecureSkipVerify: instance.InsecureSkipVerify,
Fingerprint: instance.Fingerprint,
Timeout: timeout,
})
if err != nil {
t.Fatalf("NewClient() error = %v", err)
}
poller.mu.Lock()
defer poller.mu.Unlock()
if poller.providersByOrg == nil {
poller.providersByOrg = make(map[string]map[string]*truenas.Provider)
}
if poller.providersByOrg["default"] == nil {
poller.providersByOrg["default"] = make(map[string]*truenas.Provider)
}
poller.providersByOrg["default"][instance.ID] = truenas.NewLiveProvider(&truenas.APIFetcher{Client: client})
}
func captureTrueNASPollerLogs(t *testing.T) *bytes.Buffer {
t.Helper()
var buf bytes.Buffer
origLogger := log.Logger
log.Logger = zerolog.New(&buf).Level(zerolog.DebugLevel).With().Timestamp().Logger()
t.Cleanup(func() {
log.Logger = origLogger
})
return &buf
}
type failingTrueNASFetcher struct {
err error
}
func (f failingTrueNASFetcher) Fetch(context.Context) (*truenas.FixtureSnapshot, error) {
return nil, f.err
}