mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-08 09:53:25 +00:00
1704 lines
60 KiB
Go
1704 lines
60 KiB
Go
package monitoring
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"net/url"
|
|
"strconv"
|
|
"strings"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/config"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/truenas"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/unifiedresources"
|
|
"github.com/rs/zerolog"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
func TestTrueNASPollerPollsConfiguredConnections(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
mock := newTrueNASMockServer(t, "nas-one")
|
|
t.Cleanup(mock.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connection := trueNASInstanceForServer(t, "conn-1", mock.URL(), true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return mock.RequestCount() >= 5 && hasTrueNASHostForOrg(poller, "default", "nas-one")
|
|
}, "expected configured TrueNAS connection to poll and ingest host resources")
|
|
|
|
poller.Stop()
|
|
if !hasTrueNASHostForOrg(poller, "default", "nas-one") {
|
|
t.Fatal("expected TrueNAS resources to be ingested")
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerFeatureFlagGate(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(false)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
mock := newTrueNASMockServer(t, "nas-feature-flag-off")
|
|
t.Cleanup(mock.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connection := trueNASInstanceForServer(t, "feature-flag-off-conn", mock.URL(), true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
initialStopped := poller.stopped
|
|
|
|
poller.Start(context.Background())
|
|
|
|
if poller.cancel != nil {
|
|
t.Fatal("expected Start() to be a no-op with feature flag disabled")
|
|
}
|
|
if poller.stopped != initialStopped {
|
|
t.Fatal("expected stopped channel to remain unchanged when Start() is gated")
|
|
}
|
|
select {
|
|
case <-poller.stopped:
|
|
default:
|
|
t.Fatal("expected stopped channel to remain pre-closed when Start() is gated")
|
|
}
|
|
|
|
noPollDeadline := time.Now().Add(200 * time.Millisecond)
|
|
waitForCondition(t, 500*time.Millisecond, func() bool {
|
|
return time.Now().After(noPollDeadline) && mock.RequestCount() == 0
|
|
}, "expected no TrueNAS polling requests when feature flag is disabled")
|
|
|
|
poller.Stop()
|
|
}
|
|
|
|
func TestTrueNASPollerEnableDisableCycle(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
mock := newTrueNASMockServer(t, "nas-enable-disable")
|
|
t.Cleanup(mock.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connection := trueNASInstanceForServer(t, "enable-disable-conn", mock.URL(), true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return pollerProviderCount(poller) == 1 && pollerHasProvider(poller, connection.ID) && mock.RequestCount() >= 5
|
|
}, "expected enabled poller to start and poll configured TrueNAS connection")
|
|
|
|
poller.Stop()
|
|
if !hasTrueNASHostForOrg(poller, "default", "nas-enable-disable") {
|
|
t.Fatal("expected enabled poller to ingest TrueNAS resources")
|
|
}
|
|
|
|
requestCountAfterStop := mock.RequestCount()
|
|
recordCountAfterStop := len(poller.GetCurrentRecordsForOrg("default"))
|
|
|
|
truenas.SetFeatureEnabled(false)
|
|
poller.Start(context.Background())
|
|
|
|
if poller.cancel != nil {
|
|
t.Fatal("expected Start() to remain a no-op after disable without restarting process")
|
|
}
|
|
|
|
noPollDeadline := time.Now().Add(200 * time.Millisecond)
|
|
waitForCondition(t, 500*time.Millisecond, func() bool {
|
|
return time.Now().After(noPollDeadline) && mock.RequestCount() == requestCountAfterStop
|
|
}, "expected no additional polling requests after disable and restart attempt")
|
|
|
|
if got := len(poller.GetCurrentRecordsForOrg("default")); got != recordCountAfterStop {
|
|
t.Fatalf("expected no new records after disable restart attempt, got before=%d after=%d", recordCountAfterStop, got)
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerKillSwitchAllConnectionsRemoved(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
mock := newTrueNASMockServer(t, "nas-kill-switch")
|
|
t.Cleanup(mock.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connection := trueNASInstanceForServer(t, "kill-switch-conn", mock.URL(), true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return pollerProviderCount(poller) == 1 && pollerHasProvider(poller, connection.ID) && mock.RequestCount() >= 5
|
|
}, "expected initial TrueNAS connection to be active and polling")
|
|
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() clear error = %v", err)
|
|
}
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return pollerProviderCount(poller) == 0
|
|
}, "expected all TrueNAS providers to be drained after removing all connections")
|
|
|
|
if pollerHasProvider(poller, connection.ID) {
|
|
t.Fatalf("expected provider %q to be removed after kill-switch config update", connection.ID)
|
|
}
|
|
|
|
requestCountAfterDrain := mock.RequestCount()
|
|
noPollDeadline := time.Now().Add(200 * time.Millisecond)
|
|
waitForCondition(t, 500*time.Millisecond, func() bool {
|
|
return time.Now().After(noPollDeadline) && mock.RequestCount() == requestCountAfterDrain
|
|
}, "expected no further polling after all TrueNAS connections are removed")
|
|
|
|
poller.Stop()
|
|
}
|
|
|
|
func TestTrueNASPollerRecordsMetrics(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
var requestCount atomic.Int64
|
|
var errorCount atomic.Int64
|
|
var successCount atomic.Int64
|
|
var remainingFailures atomic.Int64
|
|
remainingFailures.Store(3)
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
requestCount.Add(1)
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
if remainingFailures.Load() > 0 {
|
|
remainingFailures.Add(-1)
|
|
errorCount.Add(1)
|
|
w.WriteHeader(http.StatusInternalServerError)
|
|
_, _ = w.Write([]byte(`{"error":"simulated failure"}`))
|
|
return
|
|
}
|
|
|
|
successCount.Add(1)
|
|
switch r.URL.Path {
|
|
case "/api/v2.0/system/info":
|
|
_, _ = w.Write([]byte(`{"hostname":"metrics-host","version":"TrueNAS-SCALE-24.10.2","buildtime":"24.10.2.1","uptime_seconds":86400,"system_serial":"SER-001"}`))
|
|
case "/api/v2.0/pool":
|
|
_, _ = w.Write([]byte(`[{"id":1,"name":"tank","status":"ONLINE","size":1000,"allocated":400,"free":600}]`))
|
|
case "/api/v2.0/pool/dataset":
|
|
_, _ = w.Write([]byte(`[]`))
|
|
case "/api/v2.0/disk":
|
|
_, _ = w.Write([]byte(`[]`))
|
|
case "/api/v2.0/alert/list":
|
|
_, _ = w.Write([]byte(`[]`))
|
|
default:
|
|
http.NotFound(w, r)
|
|
}
|
|
}))
|
|
t.Cleanup(server.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connection := trueNASInstanceForServer(t, "metrics-conn", server.URL, true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 5*time.Second, func() bool {
|
|
return successCount.Load() > 0 && hasTrueNASHostForOrg(poller, "default", "metrics-host")
|
|
}, "expected TrueNAS resources to appear after initial failures")
|
|
|
|
poller.Stop()
|
|
if !hasTrueNASHostForOrg(poller, "default", "metrics-host") {
|
|
t.Fatal("expected TrueNAS resources to appear after initial failures")
|
|
}
|
|
|
|
if errorCount.Load() == 0 {
|
|
t.Fatal("expected at least one failed request to exercise metrics error path")
|
|
}
|
|
if successCount.Load() == 0 {
|
|
t.Fatal("expected successful requests to exercise metrics success path")
|
|
}
|
|
if requestCount.Load() < errorCount.Load()+successCount.Load() {
|
|
t.Fatalf("unexpected request accounting: total=%d errors=%d successes=%d", requestCount.Load(), errorCount.Load(), successCount.Load())
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerConnectionSummariesExposeObservedCounts(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
mock := newTrueNASMockServer(t, "summary-host")
|
|
t.Cleanup(mock.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connection := trueNASInstanceForServer(t, "summary-conn", mock.URL(), true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
summaries := poller.ConnectionSummaries("default", []config.TrueNASInstance{connection})
|
|
summary, ok := summaries[connection.ID]
|
|
return ok && summary.Poll != nil && summary.Poll.LastSuccessAt != nil && summary.Observed != nil
|
|
}, "expected connection summary to include successful poll and observed counts")
|
|
|
|
summary := poller.ConnectionSummaries("default", []config.TrueNASInstance{connection})[connection.ID]
|
|
if summary.Poll == nil || summary.Poll.IntervalSeconds != 60 {
|
|
t.Fatalf("expected poll interval summary 60 seconds, got %+v", summary.Poll)
|
|
}
|
|
if summary.Observed == nil {
|
|
t.Fatal("expected observed summary to be present")
|
|
}
|
|
if summary.Observed.Host != "summary-host" || summary.Observed.ResourceID != "summary-host" {
|
|
t.Fatalf("unexpected observed host identity: %+v", summary.Observed)
|
|
}
|
|
if summary.Observed.Systems != 1 || summary.Observed.StoragePools != 1 || summary.Observed.Datasets != 1 || summary.Observed.Disks != 1 {
|
|
t.Fatalf("unexpected observed counts: %+v", summary.Observed)
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerConnectionSummariesCaptureFailures(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusUnauthorized)
|
|
_, _ = w.Write([]byte(`{"error":"unauthorized"}`))
|
|
}))
|
|
t.Cleanup(server.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connection := trueNASInstanceForServer(t, "summary-fail", server.URL, true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
summaries := poller.ConnectionSummaries("default", []config.TrueNASInstance{connection})
|
|
summary, ok := summaries[connection.ID]
|
|
return ok && summary.Poll != nil && summary.Poll.LastError != nil && summary.Poll.ConsecutiveFailures > 0
|
|
}, "expected connection summary to capture poll failure state")
|
|
|
|
summary := poller.ConnectionSummaries("default", []config.TrueNASInstance{connection})[connection.ID]
|
|
if summary.Poll == nil || summary.Poll.LastError == nil {
|
|
t.Fatalf("expected poll failure summary, got %+v", summary.Poll)
|
|
}
|
|
if summary.Poll.LastError.Category != "auth" {
|
|
t.Fatalf("expected auth error category, got %+v", summary.Poll.LastError)
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerManualConnectionTestsUpdateSummariesWithoutClearingObservedCounts(t *testing.T) {
|
|
poller := NewTrueNASPoller(nil, time.Minute, nil)
|
|
connection := config.TrueNASInstance{
|
|
ID: "manual-test-conn",
|
|
Host: "manual-test.local",
|
|
APIKey: "secret",
|
|
UseHTTPS: true,
|
|
Enabled: true,
|
|
PollIntervalSecs: 120,
|
|
}
|
|
connection.ApplyDefaults()
|
|
|
|
snapshot := &truenas.FixtureSnapshot{
|
|
System: truenas.SystemInfo{
|
|
Hostname: "manual-test",
|
|
},
|
|
Pools: []truenas.Pool{{Name: "tank"}},
|
|
}
|
|
firstSuccess := time.Date(2026, time.March, 30, 10, 0, 0, 0, time.UTC)
|
|
failureAt := firstSuccess.Add(2 * time.Minute)
|
|
manualSuccessAt := failureAt.Add(2 * time.Minute)
|
|
|
|
poller.mu.Lock()
|
|
poller.recordConnectionSuccessLocked("default", connection.ID, connection, firstSuccess, snapshot)
|
|
poller.recordConnectionFailureLocked("default", connection.ID, connection, errors.New("manual auth failed"), failureAt)
|
|
poller.mu.Unlock()
|
|
|
|
poller.RecordConnectionTestSuccess("default", connection.ID, connection, manualSuccessAt)
|
|
|
|
summary := poller.ConnectionSummaries("default", []config.TrueNASInstance{connection})[connection.ID]
|
|
if summary.Poll == nil || summary.Poll.LastSuccessAt == nil {
|
|
t.Fatalf("expected manual success to update poll summary, got %+v", summary.Poll)
|
|
}
|
|
if summary.Poll.LastError != nil {
|
|
t.Fatalf("expected manual success to clear previous error, got %+v", summary.Poll.LastError)
|
|
}
|
|
if summary.Observed == nil || summary.Observed.Host != "manual-test" || summary.Observed.StoragePools != 1 {
|
|
t.Fatalf("expected observed summary to be preserved after manual success, got %+v", summary.Observed)
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerHonorsConfiguredPollInterval(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
mock := newTrueNASMockServer(t, "interval-host")
|
|
t.Cleanup(mock.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connection := trueNASInstanceForServer(t, "interval-conn", mock.URL(), true)
|
|
connection.PollIntervalSecs = 1
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 0, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return mock.RequestCount() >= 5
|
|
}, "expected initial immediate poll for configured TrueNAS connection")
|
|
|
|
requestCountAfterFirstPoll := mock.RequestCount()
|
|
time.Sleep(400 * time.Millisecond)
|
|
if got := mock.RequestCount(); got != requestCountAfterFirstPoll {
|
|
t.Fatalf("expected configured 1s poll interval to avoid an early repoll, got before=%d after=%d", requestCountAfterFirstPoll, got)
|
|
}
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return mock.RequestCount() > requestCountAfterFirstPoll
|
|
}, "expected configured 1s poll interval to trigger the next poll without waiting for the 60s default")
|
|
}
|
|
|
|
func TestTrueNASPollerPhysicalDiskTemperatureHistoryUsesTenantScopedProvider(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
fixtures := truenas.DefaultFixtures()
|
|
now := time.Date(2026, 3, 29, 20, 0, 0, 0, time.UTC)
|
|
fetcher := &controllableTrueNASHistoryFetcher{
|
|
snapshot: &fixtures,
|
|
history: map[string][]truenas.TimeSeriesPoint{
|
|
"sda": {
|
|
{Timestamp: now.Add(-2 * time.Hour), Value: 30},
|
|
{Timestamp: now.Add(-1 * time.Hour), Value: 32},
|
|
{Timestamp: now, Value: 34},
|
|
},
|
|
},
|
|
}
|
|
provider := truenas.NewLiveProvider(fetcher)
|
|
if err := provider.Refresh(context.Background()); err != nil {
|
|
t.Fatalf("Refresh() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(nil, time.Minute, nil)
|
|
poller.providersByOrg["default"] = map[string]*truenas.Provider{
|
|
"conn-1": provider,
|
|
}
|
|
|
|
history := poller.PhysicalDiskTemperatureHistory(nil, "default", 4*time.Hour)
|
|
points, ok := history["ZL0A1234"]
|
|
if !ok {
|
|
t.Fatalf("expected canonical metric resource id ZL0A1234, got %#v", history)
|
|
}
|
|
if len(points) != 3 || points[len(points)-1].Value != 34 {
|
|
t.Fatalf("unexpected tenant-scoped disk history: %+v", points)
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerGuestMetricHistoryUsesTenantScopedProvider(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
fixtures := truenas.DefaultFixtures()
|
|
now := time.Date(2026, 3, 29, 20, 0, 0, 0, time.UTC)
|
|
fetcher := &controllableTrueNASHistoryFetcher{
|
|
snapshot: &fixtures,
|
|
systemHistory: &truenas.SystemMetricHistory{
|
|
CPUPercent: []truenas.TimeSeriesPoint{
|
|
{Timestamp: now.Add(-2 * time.Hour), Value: 20},
|
|
{Timestamp: now, Value: 34},
|
|
},
|
|
MemoryPercent: []truenas.TimeSeriesPoint{
|
|
{Timestamp: now.Add(-2 * time.Hour), Value: 45},
|
|
{Timestamp: now, Value: 62},
|
|
},
|
|
},
|
|
}
|
|
provider := truenas.NewLiveProvider(fetcher)
|
|
if err := provider.Refresh(context.Background()); err != nil {
|
|
t.Fatalf("Refresh() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(nil, time.Minute, nil)
|
|
poller.providersByOrg["default"] = map[string]*truenas.Provider{
|
|
"conn-1": provider,
|
|
}
|
|
|
|
history := poller.GuestMetricHistory(nil, "default", "agent", 4*time.Hour)
|
|
metricMap, ok := history["truenas-main"]
|
|
if !ok {
|
|
t.Fatalf("expected canonical agent metric id truenas-main, got %#v", history)
|
|
}
|
|
if len(metricMap["cpu"]) != 2 || metricMap["cpu"][1].Value != 34 {
|
|
t.Fatalf("unexpected cpu history: %+v", metricMap["cpu"])
|
|
}
|
|
if len(metricMap["memory"]) != 2 || metricMap["memory"][1].Value != 62 {
|
|
t.Fatalf("unexpected memory history: %+v", metricMap["memory"])
|
|
}
|
|
}
|
|
|
|
type controllableTrueNASHistoryFetcher struct {
|
|
snapshot *truenas.FixtureSnapshot
|
|
history map[string][]truenas.TimeSeriesPoint
|
|
systemHistory *truenas.SystemMetricHistory
|
|
}
|
|
|
|
func (s *controllableTrueNASHistoryFetcher) Fetch(context.Context) (*truenas.FixtureSnapshot, error) {
|
|
if s == nil || s.snapshot == nil {
|
|
return nil, nil
|
|
}
|
|
copied := *s.snapshot
|
|
copied.Disks = append([]truenas.Disk(nil), s.snapshot.Disks...)
|
|
copied.Pools = append([]truenas.Pool(nil), s.snapshot.Pools...)
|
|
copied.Datasets = append([]truenas.Dataset(nil), s.snapshot.Datasets...)
|
|
copied.Alerts = append([]truenas.Alert(nil), s.snapshot.Alerts...)
|
|
copied.Apps = append([]truenas.App(nil), s.snapshot.Apps...)
|
|
copied.ZFSSnapshots = append([]truenas.ZFSSnapshot(nil), s.snapshot.ZFSSnapshots...)
|
|
copied.ReplicationTasks = append([]truenas.ReplicationTask(nil), s.snapshot.ReplicationTasks...)
|
|
return &copied, nil
|
|
}
|
|
|
|
func (s *controllableTrueNASHistoryFetcher) DiskTemperatureHistory(_ context.Context, identifiers []string, _ time.Duration) (map[string][]truenas.TimeSeriesPoint, error) {
|
|
result := make(map[string][]truenas.TimeSeriesPoint)
|
|
for _, identifier := range identifiers {
|
|
points, ok := s.history[identifier]
|
|
if !ok || len(points) == 0 {
|
|
continue
|
|
}
|
|
copied := make([]truenas.TimeSeriesPoint, len(points))
|
|
copy(copied, points)
|
|
result[identifier] = copied
|
|
}
|
|
if len(result) == 0 {
|
|
return nil, nil
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
func (s *controllableTrueNASHistoryFetcher) SystemMetricHistory(context.Context, time.Duration) (*truenas.SystemMetricHistory, error) {
|
|
if s == nil || s.systemHistory == nil {
|
|
return nil, nil
|
|
}
|
|
copied := *s.systemHistory
|
|
copied.CPUPercent = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.CPUPercent...)
|
|
copied.MemoryPercent = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.MemoryPercent...)
|
|
copied.MemoryUsedBytes = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.MemoryUsedBytes...)
|
|
copied.MemoryAvailableBytes = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.MemoryAvailableBytes...)
|
|
copied.MemoryTotalBytes = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.MemoryTotalBytes...)
|
|
copied.NetInRate = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.NetInRate...)
|
|
copied.NetOutRate = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.NetOutRate...)
|
|
copied.DiskReadRate = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.DiskReadRate...)
|
|
copied.DiskWriteRate = append([]truenas.TimeSeriesPoint(nil), s.systemHistory.DiskWriteRate...)
|
|
return &copied, nil
|
|
}
|
|
|
|
type pollerControlFetcher struct {
|
|
snapshot *truenas.FixtureSnapshot
|
|
startCalls []string
|
|
stopCalls []string
|
|
logReads []pollerLogReadCall
|
|
}
|
|
|
|
type pollerLogReadCall struct {
|
|
appName string
|
|
containerID string
|
|
tailLines int
|
|
}
|
|
|
|
func (f *pollerControlFetcher) Fetch(context.Context) (*truenas.FixtureSnapshot, error) {
|
|
if f == nil {
|
|
return nil, nil
|
|
}
|
|
return copyTrueNASSnapshot(f.snapshot), nil
|
|
}
|
|
|
|
func (f *pollerControlFetcher) StartApp(_ context.Context, appID string) error {
|
|
f.startCalls = append(f.startCalls, appID)
|
|
for i := range f.snapshot.Apps {
|
|
if f.snapshot.Apps[i].ID == appID {
|
|
f.snapshot.Apps[i].State = "RUNNING"
|
|
if len(f.snapshot.Apps[i].Containers) > 0 {
|
|
f.snapshot.Apps[i].Containers[0].State = "running"
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (f *pollerControlFetcher) StopApp(_ context.Context, appID string) error {
|
|
f.stopCalls = append(f.stopCalls, appID)
|
|
for i := range f.snapshot.Apps {
|
|
if f.snapshot.Apps[i].ID == appID {
|
|
f.snapshot.Apps[i].State = "STOPPED"
|
|
if len(f.snapshot.Apps[i].Containers) > 0 {
|
|
f.snapshot.Apps[i].Containers[0].State = "stopped"
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (f *pollerControlFetcher) ReadAppLogs(_ context.Context, appName, containerID string, tailLines int) ([]truenas.AppLogLine, error) {
|
|
f.logReads = append(f.logReads, pollerLogReadCall{
|
|
appName: appName,
|
|
containerID: containerID,
|
|
tailLines: tailLines,
|
|
})
|
|
return []truenas.AppLogLine{
|
|
{Timestamp: "2026-03-29T18:00:00Z", Data: "ready"},
|
|
}, nil
|
|
}
|
|
|
|
func TestTrueNASPollerControlAppRefreshesCachedRecords(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
fixtures := truenas.DefaultFixtures()
|
|
for i := range fixtures.Apps {
|
|
if fixtures.Apps[i].ID == "nextcloud" {
|
|
fixtures.Apps[i].State = "STOPPED"
|
|
if len(fixtures.Apps[i].Containers) > 0 {
|
|
fixtures.Apps[i].Containers[0].State = "stopped"
|
|
}
|
|
}
|
|
}
|
|
|
|
fetcher := &pollerControlFetcher{snapshot: &fixtures}
|
|
provider := truenas.NewLiveProvider(fetcher)
|
|
if err := provider.Refresh(context.Background()); err != nil {
|
|
t.Fatalf("Refresh() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(nil, 0, nil)
|
|
poller.providersByOrg["default"] = map[string]*truenas.Provider{"conn-1": provider}
|
|
poller.cachedRecordsByOrg["default"] = map[string][]unifiedresources.IngestRecord{"conn-1": provider.Records()}
|
|
|
|
app, err := poller.ControlApp(context.Background(), "default", "truenas-main", "nextcloud", "start")
|
|
if err != nil {
|
|
t.Fatalf("ControlApp() error = %v", err)
|
|
}
|
|
if app == nil || app.State != "RUNNING" {
|
|
t.Fatalf("expected RUNNING app after control, got %+v", app)
|
|
}
|
|
if len(fetcher.startCalls) != 1 || fetcher.startCalls[0] != "nextcloud" {
|
|
t.Fatalf("expected start call for nextcloud, got %+v", fetcher.startCalls)
|
|
}
|
|
if got := len(poller.cachedRecordsByOrg["default"]["conn-1"]); got == 0 {
|
|
t.Fatal("expected refreshed cached records after app control")
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerReadAppLogsUsesTenantScopedProvider(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
fixtures := truenas.DefaultFixtures()
|
|
fetcher := &pollerControlFetcher{snapshot: &fixtures}
|
|
provider := truenas.NewLiveProvider(fetcher)
|
|
if err := provider.Refresh(context.Background()); err != nil {
|
|
t.Fatalf("Refresh() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(nil, 0, nil)
|
|
poller.providersByOrg["default"] = map[string]*truenas.Provider{"conn-1": provider}
|
|
poller.cachedRecordsByOrg["default"] = map[string][]unifiedresources.IngestRecord{"conn-1": provider.Records()}
|
|
|
|
result, err := poller.ReadAppLogs(context.Background(), "default", "truenas-main", "nextcloud", "", 20)
|
|
if err != nil {
|
|
t.Fatalf("ReadAppLogs() error = %v", err)
|
|
}
|
|
if result == nil || result.App.Name != "Nextcloud" {
|
|
t.Fatalf("expected Nextcloud log result, got %+v", result)
|
|
}
|
|
if result.Container.ID != "nextcloud-web-1" {
|
|
t.Fatalf("expected canonical primary container, got %+v", result.Container)
|
|
}
|
|
if len(fetcher.logReads) != 1 {
|
|
t.Fatalf("expected one log read, got %+v", fetcher.logReads)
|
|
}
|
|
if call := fetcher.logReads[0]; call.appName != "nextcloud" || call.containerID != "nextcloud-web-1" || call.tailLines != 20 {
|
|
t.Fatalf("unexpected log read call: %+v", call)
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerGetAppConfigUsesTenantScopedProvider(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
fixtures := truenas.DefaultFixtures()
|
|
fetcher := &pollerControlFetcher{snapshot: &fixtures}
|
|
provider := truenas.NewLiveProvider(fetcher)
|
|
if err := provider.Refresh(context.Background()); err != nil {
|
|
t.Fatalf("Refresh() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(nil, 0, nil)
|
|
poller.providersByOrg["default"] = map[string]*truenas.Provider{"conn-1": provider}
|
|
poller.cachedRecordsByOrg["default"] = map[string][]unifiedresources.IngestRecord{"conn-1": provider.Records()}
|
|
|
|
result, err := poller.GetAppConfig(context.Background(), "default", "truenas-main", "nextcloud")
|
|
if err != nil {
|
|
t.Fatalf("GetAppConfig() error = %v", err)
|
|
}
|
|
if result == nil || result.App.Name != "Nextcloud" {
|
|
t.Fatalf("expected Nextcloud config result, got %+v", result)
|
|
}
|
|
if result.Host != "truenas-main" {
|
|
t.Fatalf("expected config host truenas-main, got %+v", result)
|
|
}
|
|
if len(result.App.Containers) != 2 {
|
|
t.Fatalf("expected canonical app runtime shape, got %+v", result.App.Containers)
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerHandlesConnectionAddRemove(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
first := newTrueNASMockServer(t, "nas-one")
|
|
second := newTrueNASMockServer(t, "nas-two")
|
|
t.Cleanup(first.Close)
|
|
t.Cleanup(second.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connOne := trueNASInstanceForServer(t, "conn-1", first.URL(), true)
|
|
connTwo := trueNASInstanceForServer(t, "conn-2", second.URL(), true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connOne}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() initial error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return pollerProviderCount(poller) == 1 && first.RequestCount() >= 5
|
|
}, "expected first connection provider and successful poll cycle")
|
|
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connOne, connTwo}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() add error = %v", err)
|
|
}
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return pollerProviderCount(poller) == 2 && second.RequestCount() >= 5
|
|
}, "expected second connection to be discovered and polled")
|
|
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connTwo}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() remove error = %v", err)
|
|
}
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return pollerProviderCount(poller) == 1 && !pollerHasProvider(poller, "conn-1")
|
|
}, "expected removed connection provider to be pruned")
|
|
|
|
poller.Stop()
|
|
if hasTrueNASHostForOrg(poller, "default", "nas-one") {
|
|
t.Fatal("expected first host resources to be removed after pruning provider")
|
|
}
|
|
if !hasTrueNASHostForOrg(poller, "default", "nas-two") {
|
|
t.Fatal("expected second host resources to be ingested")
|
|
}
|
|
}
|
|
|
|
func copyTrueNASSnapshot(snapshot *truenas.FixtureSnapshot) *truenas.FixtureSnapshot {
|
|
if snapshot == nil {
|
|
return nil
|
|
}
|
|
cloned := *snapshot
|
|
cloned.Pools = append([]truenas.Pool(nil), snapshot.Pools...)
|
|
cloned.Datasets = append([]truenas.Dataset(nil), snapshot.Datasets...)
|
|
cloned.Disks = append([]truenas.Disk(nil), snapshot.Disks...)
|
|
cloned.Alerts = append([]truenas.Alert(nil), snapshot.Alerts...)
|
|
cloned.ZFSSnapshots = append([]truenas.ZFSSnapshot(nil), snapshot.ZFSSnapshots...)
|
|
cloned.ReplicationTasks = append([]truenas.ReplicationTask(nil), snapshot.ReplicationTasks...)
|
|
if snapshot.System.TemperatureCelsius != nil {
|
|
cloned.System.TemperatureCelsius = make(map[string]float64, len(snapshot.System.TemperatureCelsius))
|
|
for key, value := range snapshot.System.TemperatureCelsius {
|
|
cloned.System.TemperatureCelsius[key] = value
|
|
}
|
|
}
|
|
if len(snapshot.Apps) > 0 {
|
|
cloned.Apps = make([]truenas.App, len(snapshot.Apps))
|
|
for i, app := range snapshot.Apps {
|
|
appCopy := app
|
|
appCopy.UsedHostIPs = append([]string(nil), app.UsedHostIPs...)
|
|
appCopy.UsedPorts = append([]truenas.AppPort(nil), app.UsedPorts...)
|
|
appCopy.Volumes = append([]truenas.AppVolume(nil), app.Volumes...)
|
|
appCopy.Images = append([]string(nil), app.Images...)
|
|
appCopy.Networks = append([]truenas.AppNetwork(nil), app.Networks...)
|
|
appCopy.Containers = append([]truenas.AppContainer(nil), app.Containers...)
|
|
if app.Stats != nil {
|
|
statsCopy := *app.Stats
|
|
statsCopy.Interfaces = append([]truenas.AppInterfaceStats(nil), app.Stats.Interfaces...)
|
|
appCopy.Stats = &statsCopy
|
|
}
|
|
cloned.Apps[i] = appCopy
|
|
}
|
|
}
|
|
return &cloned
|
|
}
|
|
|
|
func TestTrueNASPollerAPITimeout(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
var requestCount atomic.Int64
|
|
var injectDelay atomic.Bool
|
|
injectDelay.Store(true)
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
requestCount.Add(1)
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
if r.URL.Path == "/api/v2.0/system/info" && injectDelay.Load() {
|
|
time.Sleep(200 * time.Millisecond)
|
|
}
|
|
|
|
switch r.URL.Path {
|
|
case "/api/v2.0/system/info":
|
|
_, _ = w.Write([]byte(`{"hostname":"timeout-host","version":"TrueNAS-SCALE-24.10.2","buildtime":"24.10.2.1","uptime_seconds":86400,"system_serial":"SER-timeout-host"}`))
|
|
case "/api/v2.0/pool":
|
|
_, _ = w.Write([]byte(`[{"id":1,"name":"timeout-pool","status":"ONLINE","size":1000,"allocated":400,"free":600}]`))
|
|
case "/api/v2.0/pool/dataset":
|
|
_, _ = w.Write([]byte(`[]`))
|
|
case "/api/v2.0/disk":
|
|
_, _ = w.Write([]byte(`[]`))
|
|
case "/api/v2.0/alert/list":
|
|
_, _ = w.Write([]byte(`[]`))
|
|
default:
|
|
http.NotFound(w, r)
|
|
}
|
|
}))
|
|
t.Cleanup(server.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connection := trueNASInstanceForServer(t, "timeout-conn", server.URL, true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
injectTrueNASProviderTimeout(t, poller, connection, 75*time.Millisecond)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return requestCount.Load() >= 3
|
|
}, "expected poller to continue retrying while API requests time out")
|
|
|
|
injectDelay.Store(false)
|
|
recoveryStart := requestCount.Load()
|
|
|
|
waitForCondition(t, 3*time.Second, func() bool {
|
|
return requestCount.Load() >= recoveryStart+5
|
|
}, "expected at least one successful poll cycle after timeout clears")
|
|
|
|
poller.Stop()
|
|
if !hasTrueNASHostForOrg(poller, "default", "timeout-host") {
|
|
t.Fatal("expected poller to recover and ingest TrueNAS resources after timeout clears")
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerAuthFailure(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
var requestCount atomic.Int64
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
requestCount.Add(1)
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusUnauthorized)
|
|
_, _ = w.Write([]byte(`{"error":"unauthorized"}`))
|
|
}))
|
|
t.Cleanup(server.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connection := trueNASInstanceForServer(t, "auth-failure-conn", server.URL, true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return requestCount.Load() >= 2
|
|
}, "expected at least two poll attempts with auth failures")
|
|
|
|
before := requestCount.Load()
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return requestCount.Load() > before
|
|
}, "expected poller to keep attempting after repeated auth failures")
|
|
|
|
select {
|
|
case <-poller.stopped:
|
|
t.Fatal("expected poller to keep running after auth failures")
|
|
default:
|
|
}
|
|
|
|
poller.Stop()
|
|
if hasTrueNASHostForOrg(poller, "default", "auth-failure-host") {
|
|
t.Fatal("expected no resources to be ingested when every poll fails auth")
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerStaleDataRecovery(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
const (
|
|
initialSuccessPolls = int64(2)
|
|
failurePolls = int64(3)
|
|
)
|
|
|
|
var pollAttempts atomic.Int64
|
|
var initialSuccesses atomic.Int64
|
|
var recoverySuccesses atomic.Int64
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
attempt := pollAttempts.Load()
|
|
|
|
if r.URL.Path == "/api/v2.0/system/info" {
|
|
attempt = pollAttempts.Add(1)
|
|
switch {
|
|
case attempt <= initialSuccessPolls:
|
|
_, _ = w.Write([]byte(`{"hostname":"stale-before","version":"TrueNAS-SCALE-24.10.2","buildtime":"24.10.2.1","uptime_seconds":86400,"system_serial":"SER-stale-before"}`))
|
|
case attempt <= initialSuccessPolls+failurePolls:
|
|
w.WriteHeader(http.StatusInternalServerError)
|
|
_, _ = w.Write([]byte(`{"error":"simulated outage"}`))
|
|
default:
|
|
_, _ = w.Write([]byte(`{"hostname":"stale-after","version":"TrueNAS-SCALE-24.10.2","buildtime":"24.10.2.1","uptime_seconds":86500,"system_serial":"SER-stale-after"}`))
|
|
}
|
|
return
|
|
}
|
|
|
|
if attempt <= initialSuccessPolls {
|
|
switch r.URL.Path {
|
|
case "/api/v2.0/pool":
|
|
_, _ = w.Write([]byte(`[{"id":1,"name":"before-pool","status":"ONLINE","size":1000,"allocated":400,"free":600}]`))
|
|
case "/api/v2.0/pool/dataset":
|
|
_, _ = w.Write([]byte(`[]`))
|
|
case "/api/v2.0/disk":
|
|
_, _ = w.Write([]byte(`[]`))
|
|
case "/api/v2.0/alert/list":
|
|
initialSuccesses.Add(1)
|
|
_, _ = w.Write([]byte(`[]`))
|
|
default:
|
|
http.NotFound(w, r)
|
|
}
|
|
return
|
|
}
|
|
|
|
switch r.URL.Path {
|
|
case "/api/v2.0/pool":
|
|
_, _ = w.Write([]byte(`[{"id":1,"name":"after-pool","status":"ONLINE","size":1000,"allocated":500,"free":500}]`))
|
|
case "/api/v2.0/pool/dataset":
|
|
_, _ = w.Write([]byte(`[]`))
|
|
case "/api/v2.0/disk":
|
|
_, _ = w.Write([]byte(`[]`))
|
|
case "/api/v2.0/alert/list":
|
|
recoverySuccesses.Add(1)
|
|
_, _ = w.Write([]byte(`[]`))
|
|
default:
|
|
http.NotFound(w, r)
|
|
}
|
|
}))
|
|
t.Cleanup(server.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connection := trueNASInstanceForServer(t, "stale-recovery-conn", server.URL, true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return initialSuccesses.Load() > 0
|
|
}, "expected initial successful polls to ingest baseline resources")
|
|
|
|
waitForCondition(t, 3*time.Second, func() bool {
|
|
return pollAttempts.Load() >= initialSuccessPolls+failurePolls
|
|
}, "expected poller to continue attempts throughout failure window")
|
|
|
|
waitForCondition(t, 3*time.Second, func() bool {
|
|
return recoverySuccesses.Load() > 0
|
|
}, "expected poller to recover and ingest refreshed data after failures")
|
|
|
|
poller.Stop()
|
|
if !hasTrueNASHostForOrg(poller, "default", "stale-after") {
|
|
t.Fatal("expected recovered TrueNAS host data to be ingested")
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerConnectionFlap(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
var requestCount atomic.Int64
|
|
var isDown atomic.Bool
|
|
var recovered atomic.Bool
|
|
var beforeDownSuccesses atomic.Int64
|
|
var afterRecoverySuccesses atomic.Int64
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
requestCount.Add(1)
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
if isDown.Load() {
|
|
w.WriteHeader(http.StatusServiceUnavailable)
|
|
_, _ = w.Write([]byte(`{"error":"temporarily unavailable"}`))
|
|
return
|
|
}
|
|
|
|
hostname := "flap-before"
|
|
if recovered.Load() {
|
|
hostname = "flap-after"
|
|
}
|
|
|
|
switch r.URL.Path {
|
|
case "/api/v2.0/system/info":
|
|
_, _ = w.Write([]byte(`{"hostname":"` + hostname + `","version":"TrueNAS-SCALE-24.10.2","buildtime":"24.10.2.1","uptime_seconds":86400,"system_serial":"SER-` + hostname + `"}`))
|
|
case "/api/v2.0/pool":
|
|
_, _ = w.Write([]byte(`[{"id":1,"name":"flap-pool","status":"ONLINE","size":1000,"allocated":400,"free":600}]`))
|
|
case "/api/v2.0/pool/dataset":
|
|
_, _ = w.Write([]byte(`[]`))
|
|
case "/api/v2.0/disk":
|
|
_, _ = w.Write([]byte(`[]`))
|
|
case "/api/v2.0/alert/list":
|
|
if recovered.Load() {
|
|
afterRecoverySuccesses.Add(1)
|
|
} else {
|
|
beforeDownSuccesses.Add(1)
|
|
}
|
|
_, _ = w.Write([]byte(`[]`))
|
|
default:
|
|
http.NotFound(w, r)
|
|
}
|
|
}))
|
|
t.Cleanup(server.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connection := trueNASInstanceForServer(t, "connection-flap-conn", server.URL, true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return beforeDownSuccesses.Load() > 0
|
|
}, "expected initial TrueNAS ingest before simulated outage")
|
|
|
|
isDown.Store(true)
|
|
startedDownAt := requestCount.Load()
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return requestCount.Load() >= startedDownAt+3
|
|
}, "expected poller to continue making requests while endpoint is down")
|
|
|
|
recovered.Store(true)
|
|
isDown.Store(false)
|
|
|
|
waitForCondition(t, 3*time.Second, func() bool {
|
|
return afterRecoverySuccesses.Load() > 0
|
|
}, "expected poller to recover ingestion after endpoint returns")
|
|
|
|
poller.Stop()
|
|
if !hasTrueNASHostForOrg(poller, "default", "flap-after") {
|
|
t.Fatal("expected recovered endpoint data to be ingested")
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerConcurrentConfigChange(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
first := newTrueNASMockServer(t, "config-change-one")
|
|
second := newTrueNASMockServer(t, "config-change-two")
|
|
t.Cleanup(first.Close)
|
|
t.Cleanup(second.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connOne := trueNASInstanceForServer(t, "config-change-1", first.URL(), true)
|
|
connTwo := trueNASInstanceForServer(t, "config-change-2", second.URL(), true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connOne}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() initial error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return pollerProviderCount(poller) == 1 && pollerHasProvider(poller, connOne.ID) && first.RequestCount() >= 5
|
|
}, "expected first connection to be active before config updates")
|
|
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connOne, connTwo}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() add error = %v", err)
|
|
}
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return pollerProviderCount(poller) == 2 && pollerHasProvider(poller, connOne.ID) && pollerHasProvider(poller, connTwo.ID) && second.RequestCount() >= 5
|
|
}, "expected second connection to appear while poller is running")
|
|
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connTwo}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() remove error = %v", err)
|
|
}
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return pollerProviderCount(poller) == 1 && !pollerHasProvider(poller, connOne.ID) && pollerHasProvider(poller, connTwo.ID)
|
|
}, "expected provider map to converge after removing first connection")
|
|
|
|
poller.Stop()
|
|
if !hasTrueNASHostForOrg(poller, "default", "config-change-two") {
|
|
t.Fatal("expected second connection resources to be ingested")
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerRebindsProviderWhenConnectionConfigChanges(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
first := newTrueNASMockServer(t, "config-rebind-one")
|
|
second := newTrueNASMockServer(t, "config-rebind-two")
|
|
t.Cleanup(first.Close)
|
|
t.Cleanup(second.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
connection := trueNASInstanceForServer(t, "config-rebind", first.URL(), true)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() initial error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return pollerProviderCount(poller) == 1 && first.RequestCount() >= 5
|
|
}, "expected initial TrueNAS connection to poll before config change")
|
|
|
|
updated := trueNASInstanceForServer(t, connection.ID, second.URL(), true)
|
|
updated.Name = connection.Name
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{updated}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() updated error = %v", err)
|
|
}
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return pollerProviderCount(poller) == 1 && second.RequestCount() >= 5
|
|
}, "expected provider to rebind to updated TrueNAS endpoint")
|
|
|
|
firstCountAfterRebind := first.RequestCount()
|
|
noPollDeadline := time.Now().Add(200 * time.Millisecond)
|
|
waitForCondition(t, 500*time.Millisecond, func() bool {
|
|
return time.Now().After(noPollDeadline) && first.RequestCount() == firstCountAfterRebind
|
|
}, "expected replaced TrueNAS endpoint to stop receiving poll requests")
|
|
|
|
poller.Stop()
|
|
if hasTrueNASHostForOrg(poller, "default", "config-rebind-one") {
|
|
t.Fatal("expected old TrueNAS host to be replaced after config rebind")
|
|
}
|
|
if !hasTrueNASHostForOrg(poller, "default", "config-rebind-two") {
|
|
t.Fatal("expected updated TrueNAS host to be ingested after config rebind")
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerSkipsDisabledConnections(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
enabled := newTrueNASMockServer(t, "nas-enabled")
|
|
disabled := newTrueNASMockServer(t, "nas-disabled")
|
|
t.Cleanup(enabled.Close)
|
|
t.Cleanup(disabled.Close)
|
|
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
enabledConn := trueNASInstanceForServer(t, "conn-enabled", enabled.URL(), true)
|
|
disabledConn := trueNASInstanceForServer(t, "conn-disabled", disabled.URL(), false)
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{enabledConn, disabledConn}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return pollerProviderCount(poller) == 1 && enabled.RequestCount() >= 5
|
|
}, "expected only enabled connection provider and resources")
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return enabled.RequestCount() >= 10
|
|
}, "expected additional polling cycles for enabled connection")
|
|
|
|
if disabled.RequestCount() != 0 {
|
|
t.Fatalf("expected disabled connection to be skipped, got %d requests", disabled.RequestCount())
|
|
}
|
|
poller.Stop()
|
|
if !hasTrueNASHostForOrg(poller, "default", "nas-enabled") {
|
|
t.Fatal("expected enabled connection host to be present in cached records")
|
|
}
|
|
if hasTrueNASHostForOrg(poller, "default", "nas-disabled") {
|
|
t.Fatal("expected disabled connection host to be absent from cached records")
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerCachesRecordsPerOrganization(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
defaultOrgMock := newTrueNASMockServer(t, "default-nas")
|
|
tenantMock := newTrueNASMockServer(t, "tenant-nas")
|
|
t.Cleanup(defaultOrgMock.Close)
|
|
t.Cleanup(tenantMock.Close)
|
|
|
|
mtp, defaultPersistence := newTestTenantPersistence(t)
|
|
defaultConn := trueNASInstanceForServer(t, "default-conn", defaultOrgMock.URL(), true)
|
|
if err := defaultPersistence.SaveTrueNASConfig([]config.TrueNASInstance{defaultConn}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig(default) error = %v", err)
|
|
}
|
|
|
|
tenantPersistence, err := mtp.GetPersistence("org-a")
|
|
if err != nil {
|
|
t.Fatalf("GetPersistence(org-a) error = %v", err)
|
|
}
|
|
tenantConn := trueNASInstanceForServer(t, "tenant-conn", tenantMock.URL(), true)
|
|
if err := tenantPersistence.SaveTrueNASConfig([]config.TrueNASInstance{tenantConn}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig(org-a) error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
t.Cleanup(poller.Stop)
|
|
|
|
waitForCondition(t, 2*time.Second, func() bool {
|
|
return defaultOrgMock.RequestCount() >= 5 &&
|
|
tenantMock.RequestCount() >= 5 &&
|
|
hasTrueNASHostForOrg(poller, "default", "default-nas") &&
|
|
hasTrueNASHostForOrg(poller, "org-a", "tenant-nas")
|
|
}, "expected polling for both default and org-a TrueNAS connections and cached records for each org")
|
|
|
|
poller.Stop()
|
|
if !hasTrueNASHostForOrg(poller, "default", "default-nas") {
|
|
t.Fatal("expected default org records to include default host")
|
|
}
|
|
if hasTrueNASHostForOrg(poller, "default", "tenant-nas") {
|
|
t.Fatal("expected default org records to exclude tenant host")
|
|
}
|
|
if !hasTrueNASHostForOrg(poller, "org-a", "tenant-nas") {
|
|
t.Fatal("expected tenant records to include tenant host")
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerStopsCleanly(t *testing.T) {
|
|
previous := truenas.IsFeatureEnabled()
|
|
truenas.SetFeatureEnabled(true)
|
|
t.Cleanup(func() { truenas.SetFeatureEnabled(previous) })
|
|
|
|
mtp, _ := newTestTenantPersistence(t)
|
|
poller := NewTrueNASPoller(mtp, 50*time.Millisecond, nil)
|
|
poller.Start(context.Background())
|
|
poller.Stop()
|
|
|
|
select {
|
|
case <-poller.stopped:
|
|
case <-time.After(time.Second):
|
|
t.Fatal("expected poller stopped channel to close")
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerSnapshotOwnedSources(t *testing.T) {
|
|
poller := NewTrueNASPoller(nil, time.Second, nil)
|
|
|
|
defaultSources := poller.SnapshotOwnedSources()
|
|
if len(defaultSources) != 1 || defaultSources[0] != unifiedresources.SourceTrueNAS {
|
|
t.Fatalf("default owned sources = %#v, want [%q]", defaultSources, unifiedresources.SourceTrueNAS)
|
|
}
|
|
|
|
orgSources := poller.SnapshotOwnedSourcesForOrg("org-a")
|
|
if len(orgSources) != 1 || orgSources[0] != unifiedresources.SourceTrueNAS {
|
|
t.Fatalf("org owned sources = %#v, want [%q]", orgSources, unifiedresources.SourceTrueNAS)
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerSupplementalInventoryReadyAtUsesPersistedActiveConnections(t *testing.T) {
|
|
mtp, persistence := newTestTenantPersistence(t)
|
|
|
|
connection := config.NewTrueNASInstance()
|
|
connection.ID = "conn-ready"
|
|
connection.Host = "nas-ready.lab.local"
|
|
connection.APIKey = "api-key"
|
|
if err := persistence.SaveTrueNASConfig([]config.TrueNASInstance{connection}); err != nil {
|
|
t.Fatalf("SaveTrueNASConfig() error = %v", err)
|
|
}
|
|
|
|
poller := NewTrueNASPoller(mtp, time.Second, nil)
|
|
|
|
if readyAt, settled := poller.SupplementalInventoryReadyAt(nil, "default"); settled || !readyAt.IsZero() {
|
|
t.Fatalf("SupplementalInventoryReadyAt() before any attempt = (%v, %t), want (zero, false)", readyAt, settled)
|
|
}
|
|
|
|
attemptedAt := time.Now().UTC()
|
|
poller.RecordConnectionTestSuccess("default", connection.ID, connection, attemptedAt)
|
|
|
|
readyAt, settled := poller.SupplementalInventoryReadyAt(nil, "default")
|
|
if !settled {
|
|
t.Fatal("expected readiness to settle after the first recorded attempt")
|
|
}
|
|
if !readyAt.Equal(attemptedAt) {
|
|
t.Fatalf("SupplementalInventoryReadyAt() = %v, want %v", readyAt, attemptedAt)
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerSyncConnectionsLogsStructuredContextWhenPersistenceNil(t *testing.T) {
|
|
logOutput := captureTrueNASPollerLogs(t)
|
|
|
|
poller := NewTrueNASPoller(nil, time.Second, nil)
|
|
poller.syncConnections()
|
|
|
|
for _, expected := range []string{
|
|
`"level":"warn"`,
|
|
`"component":"truenas_poller"`,
|
|
`"action":"sync_connections"`,
|
|
`"message":"TrueNAS poller cannot sync connections because multi-tenant persistence is nil"`,
|
|
} {
|
|
if !strings.Contains(logOutput.String(), expected) {
|
|
t.Fatalf("expected log output to include %s, got %q", expected, logOutput.String())
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestTrueNASPollerPollAllLogsStructuredContextOnRefreshFailure(t *testing.T) {
|
|
logOutput := captureTrueNASPollerLogs(t)
|
|
|
|
poller := NewTrueNASPoller(nil, time.Second, nil)
|
|
poller.mu.Lock()
|
|
if poller.configsByOrg == nil {
|
|
poller.configsByOrg = make(map[string]map[string]config.TrueNASInstance)
|
|
}
|
|
if poller.configsByOrg["default"] == nil {
|
|
poller.configsByOrg["default"] = make(map[string]config.TrueNASInstance)
|
|
}
|
|
connection := config.NewTrueNASInstance()
|
|
connection.ID = "conn-refresh-fail"
|
|
connection.Host = "nas-refresh-fail.lab.local"
|
|
connection.APIKey = "api-key"
|
|
poller.configsByOrg["default"][connection.ID] = connection
|
|
if poller.providersByOrg == nil {
|
|
poller.providersByOrg = make(map[string]map[string]*truenas.Provider)
|
|
}
|
|
if poller.providersByOrg["default"] == nil {
|
|
poller.providersByOrg["default"] = make(map[string]*truenas.Provider)
|
|
}
|
|
poller.providersByOrg["default"][connection.ID] = truenas.NewLiveProvider(failingTrueNASFetcher{err: fmt.Errorf("refresh exploded")})
|
|
poller.mu.Unlock()
|
|
|
|
poller.pollAll(context.Background())
|
|
|
|
for _, expected := range []string{
|
|
`"level":"warn"`,
|
|
`"component":"truenas_poller"`,
|
|
`"action":"refresh_connection"`,
|
|
`"connection_id":"conn-refresh-fail"`,
|
|
`"error":"refresh truenas snapshot: refresh exploded"`,
|
|
`"message":"TrueNAS poller refresh failed"`,
|
|
} {
|
|
if !strings.Contains(logOutput.String(), expected) {
|
|
t.Fatalf("expected log output to include %s, got %q", expected, logOutput.String())
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestClassifyTrueNASError(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
err error
|
|
expectedType string
|
|
expectedRetry bool
|
|
}{
|
|
{
|
|
name: "nil error returns nil",
|
|
err: nil,
|
|
expectedType: "",
|
|
},
|
|
{
|
|
name: "APIError 401 classifies as auth",
|
|
err: &truenas.APIError{StatusCode: 401, Method: "GET", Path: "/system/info", Body: "Unauthorized"},
|
|
expectedType: "auth",
|
|
expectedRetry: false,
|
|
},
|
|
{
|
|
name: "APIError 403 classifies as auth",
|
|
err: &truenas.APIError{StatusCode: 403, Method: "GET", Path: "/pool", Body: "Forbidden"},
|
|
expectedType: "auth",
|
|
expectedRetry: false,
|
|
},
|
|
{
|
|
name: "APIError 500 classifies as api",
|
|
err: &truenas.APIError{StatusCode: 500, Method: "GET", Path: "/pool", Body: "Internal Server Error"},
|
|
expectedType: "api",
|
|
expectedRetry: true,
|
|
},
|
|
{
|
|
name: "APIError 408 classifies as timeout",
|
|
err: &truenas.APIError{StatusCode: 408, Method: "GET", Path: "/system/info", Body: "Request Timeout"},
|
|
expectedType: "timeout",
|
|
expectedRetry: true,
|
|
},
|
|
{
|
|
name: "APIError 504 classifies as timeout",
|
|
err: &truenas.APIError{StatusCode: 504, Method: "GET", Path: "/pool", Body: "Gateway Timeout"},
|
|
expectedType: "timeout",
|
|
expectedRetry: true,
|
|
},
|
|
{
|
|
name: "wrapped APIError 401 classifies as auth",
|
|
err: fmt.Errorf("fetch truenas system info: %w", &truenas.APIError{StatusCode: 401, Method: "GET", Path: "/system/info", Body: "Unauthorized"}),
|
|
expectedType: "auth",
|
|
expectedRetry: false,
|
|
},
|
|
{
|
|
name: "context.DeadlineExceeded classifies as timeout",
|
|
err: context.DeadlineExceeded,
|
|
expectedType: "timeout",
|
|
expectedRetry: true,
|
|
},
|
|
{
|
|
name: "wrapped context.DeadlineExceeded classifies as timeout",
|
|
err: fmt.Errorf("fetch truenas system info: %w", context.DeadlineExceeded),
|
|
expectedType: "timeout",
|
|
expectedRetry: true,
|
|
},
|
|
{
|
|
name: "url.Error with timeout classifies as timeout",
|
|
err: &url.Error{Op: "Get", URL: "https://truenas.local/api/v2.0/system/info", Err: context.DeadlineExceeded},
|
|
expectedType: "timeout",
|
|
expectedRetry: true,
|
|
},
|
|
{
|
|
name: "net.OpError classifies as connection",
|
|
err: &net.OpError{Op: "dial", Net: "tcp", Addr: nil, Err: fmt.Errorf("connection refused")},
|
|
expectedType: "connection",
|
|
expectedRetry: true,
|
|
},
|
|
{
|
|
name: "wrapped net.OpError classifies as connection",
|
|
err: fmt.Errorf("truenas request GET /system/info failed: %w", &net.OpError{Op: "dial", Net: "tcp", Addr: nil, Err: fmt.Errorf("connection refused")}),
|
|
expectedType: "connection",
|
|
expectedRetry: true,
|
|
},
|
|
{
|
|
name: "plain error classifies as api fallback",
|
|
err: fmt.Errorf("some unknown error"),
|
|
expectedType: "api",
|
|
expectedRetry: true,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
result := classifyTrueNASError(tt.err, "test-conn")
|
|
|
|
if tt.err == nil {
|
|
if result != nil {
|
|
t.Fatalf("expected nil, got %+v", result)
|
|
}
|
|
return
|
|
}
|
|
|
|
if result == nil {
|
|
t.Fatal("expected non-nil MonitorError")
|
|
}
|
|
|
|
if string(result.Type) != tt.expectedType {
|
|
t.Errorf("expected type %q, got %q", tt.expectedType, result.Type)
|
|
}
|
|
if result.Retryable != tt.expectedRetry {
|
|
t.Errorf("expected retryable=%v, got %v", tt.expectedRetry, result.Retryable)
|
|
}
|
|
if result.Instance != "test-conn" {
|
|
t.Errorf("expected instance %q, got %q", "test-conn", result.Instance)
|
|
}
|
|
if result.Op != "truenas_poll" {
|
|
t.Errorf("expected op %q, got %q", "truenas_poll", result.Op)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
type trueNASMockServer struct {
|
|
server *httptest.Server
|
|
requests atomic.Int64
|
|
}
|
|
|
|
func newTrueNASMockServer(t *testing.T, hostname string) *trueNASMockServer {
|
|
t.Helper()
|
|
|
|
mock := &trueNASMockServer{}
|
|
poolName := "pool-" + hostname
|
|
|
|
mock.server = httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
|
|
mock.requests.Add(1)
|
|
writer.Header().Set("Content-Type", "application/json")
|
|
|
|
switch request.URL.Path {
|
|
case "/api/v2.0/system/info":
|
|
_, _ = writer.Write([]byte(`{"hostname":"` + hostname + `","version":"TrueNAS-SCALE-24.10.2","buildtime":"24.10.2.1","uptime_seconds":86400,"system_serial":"SER-` + hostname + `"}`))
|
|
case "/api/v2.0/pool":
|
|
_, _ = writer.Write([]byte(`[{"id":1,"name":"` + poolName + `","status":"ONLINE","size":1000,"allocated":400,"free":600}]`))
|
|
case "/api/v2.0/pool/dataset":
|
|
_, _ = writer.Write([]byte(`[{"id":"` + poolName + `/apps","name":"` + poolName + `/apps","pool":"` + poolName + `","used":{"rawvalue":"12345","parsed":12345},"available":{"rawvalue":"555","parsed":555},"mountpoint":"/mnt/` + poolName + `/apps","readonly":{"rawvalue":"off","parsed":false},"mounted":true}]`))
|
|
case "/api/v2.0/disk":
|
|
_, _ = writer.Write([]byte(`[{"identifier":"{disk-1}","name":"sda","serial":"SER-A","size":1000000,"model":"Seagate","type":"HDD","pool":"` + poolName + `","bus":"SATA","rotationrate":7200,"status":"ONLINE"}]`))
|
|
case "/api/v2.0/alert/list":
|
|
_, _ = writer.Write([]byte(`[{"id":"a1","level":"WARNING","formatted":"Disk temp high","source":"DiskService","dismissed":false,"datetime":{"$date":1707400000000}}]`))
|
|
default:
|
|
http.NotFound(writer, request)
|
|
}
|
|
}))
|
|
|
|
return mock
|
|
}
|
|
|
|
func (m *trueNASMockServer) URL() string {
|
|
return m.server.URL
|
|
}
|
|
|
|
func (m *trueNASMockServer) Close() {
|
|
if m != nil && m.server != nil {
|
|
m.server.Close()
|
|
}
|
|
}
|
|
|
|
func (m *trueNASMockServer) RequestCount() int64 {
|
|
if m == nil {
|
|
return 0
|
|
}
|
|
return m.requests.Load()
|
|
}
|
|
|
|
func trueNASInstanceForServer(t *testing.T, id string, rawURL string, enabled bool) config.TrueNASInstance {
|
|
t.Helper()
|
|
|
|
parsed, err := url.Parse(rawURL)
|
|
if err != nil {
|
|
t.Fatalf("url.Parse(%q) error = %v", rawURL, err)
|
|
}
|
|
port, err := strconv.Atoi(parsed.Port())
|
|
if err != nil {
|
|
t.Fatalf("parse port from %q error = %v", rawURL, err)
|
|
}
|
|
|
|
return config.TrueNASInstance{
|
|
ID: id,
|
|
Name: "connection-" + id,
|
|
Host: parsed.Hostname(),
|
|
Port: port,
|
|
APIKey: "test-api-key",
|
|
UseHTTPS: strings.EqualFold(parsed.Scheme, "https"),
|
|
Enabled: enabled,
|
|
}
|
|
}
|
|
|
|
func waitForCondition(t *testing.T, timeout time.Duration, condition func() bool, failureMessage string) {
|
|
t.Helper()
|
|
|
|
deadline := time.Now().Add(timeout)
|
|
for time.Now().Before(deadline) {
|
|
if condition() {
|
|
return
|
|
}
|
|
time.Sleep(10 * time.Millisecond)
|
|
}
|
|
t.Fatal(failureMessage)
|
|
}
|
|
|
|
func newTestTenantPersistence(t *testing.T) (*config.MultiTenantPersistence, *config.ConfigPersistence) {
|
|
t.Helper()
|
|
|
|
mtp := config.NewMultiTenantPersistence(t.TempDir())
|
|
persistence, err := mtp.GetPersistence("default")
|
|
if err != nil {
|
|
t.Fatalf("GetPersistence(default) error = %v", err)
|
|
}
|
|
return mtp, persistence
|
|
}
|
|
|
|
func hasTrueNASHostForOrg(poller *TrueNASPoller, orgID, hostname string) bool {
|
|
if poller == nil {
|
|
return false
|
|
}
|
|
|
|
records := poller.GetCurrentRecordsForOrg(orgID)
|
|
if len(records) == 0 {
|
|
return false
|
|
}
|
|
|
|
registry := unifiedresources.NewRegistry(nil)
|
|
registry.IngestRecords(unifiedresources.SourceTrueNAS, records)
|
|
return hasTrueNASHost(registry, hostname)
|
|
}
|
|
|
|
func hasTrueNASHost(registry *unifiedresources.ResourceRegistry, hostname string) bool {
|
|
if registry == nil {
|
|
return false
|
|
}
|
|
|
|
resources := registry.List()
|
|
for _, resource := range resources {
|
|
if resource.Type != unifiedresources.ResourceTypeAgent || resource.Name != hostname {
|
|
continue
|
|
}
|
|
if resourceHasSource(resource, unifiedresources.SourceTrueNAS) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func resourceHasSource(resource unifiedresources.Resource, source unifiedresources.DataSource) bool {
|
|
for _, candidate := range resource.Sources {
|
|
if candidate == source {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func pollerProviderCount(poller *TrueNASPoller) int {
|
|
if poller == nil {
|
|
return 0
|
|
}
|
|
poller.mu.Lock()
|
|
defer poller.mu.Unlock()
|
|
total := 0
|
|
for _, providers := range poller.providersByOrg {
|
|
total += len(providers)
|
|
}
|
|
return total
|
|
}
|
|
|
|
func pollerHasProvider(poller *TrueNASPoller, id string) bool {
|
|
if poller == nil {
|
|
return false
|
|
}
|
|
poller.mu.Lock()
|
|
defer poller.mu.Unlock()
|
|
providers := poller.providersByOrg["default"]
|
|
if providers == nil {
|
|
return false
|
|
}
|
|
_, ok := providers[id]
|
|
return ok
|
|
}
|
|
|
|
func injectTrueNASProviderTimeout(t *testing.T, poller *TrueNASPoller, instance config.TrueNASInstance, timeout time.Duration) {
|
|
t.Helper()
|
|
|
|
client, err := truenas.NewClient(truenas.ClientConfig{
|
|
Host: instance.Host,
|
|
Port: instance.Port,
|
|
APIKey: instance.APIKey,
|
|
Username: instance.Username,
|
|
Password: instance.Password,
|
|
UseHTTPS: instance.UseHTTPS,
|
|
InsecureSkipVerify: instance.InsecureSkipVerify,
|
|
Fingerprint: instance.Fingerprint,
|
|
Timeout: timeout,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("NewClient() error = %v", err)
|
|
}
|
|
|
|
poller.mu.Lock()
|
|
defer poller.mu.Unlock()
|
|
if poller.providersByOrg == nil {
|
|
poller.providersByOrg = make(map[string]map[string]*truenas.Provider)
|
|
}
|
|
if poller.providersByOrg["default"] == nil {
|
|
poller.providersByOrg["default"] = make(map[string]*truenas.Provider)
|
|
}
|
|
poller.providersByOrg["default"][instance.ID] = truenas.NewLiveProvider(&truenas.APIFetcher{Client: client})
|
|
}
|
|
|
|
func captureTrueNASPollerLogs(t *testing.T) *bytes.Buffer {
|
|
t.Helper()
|
|
|
|
var buf bytes.Buffer
|
|
origLogger := log.Logger
|
|
log.Logger = zerolog.New(&buf).Level(zerolog.DebugLevel).With().Timestamp().Logger()
|
|
t.Cleanup(func() {
|
|
log.Logger = origLogger
|
|
})
|
|
|
|
return &buf
|
|
}
|
|
|
|
type failingTrueNASFetcher struct {
|
|
err error
|
|
}
|
|
|
|
func (f failingTrueNASFetcher) Fetch(context.Context) (*truenas.FixtureSnapshot, error) {
|
|
return nil, f.err
|
|
}
|