diff --git a/docs/release-control/v6/internal/subsystems/agent-lifecycle.md b/docs/release-control/v6/internal/subsystems/agent-lifecycle.md index ea314795d..ee051fd9a 100644 --- a/docs/release-control/v6/internal/subsystems/agent-lifecycle.md +++ b/docs/release-control/v6/internal/subsystems/agent-lifecycle.md @@ -1927,8 +1927,10 @@ config `signature` backward-compatible with installed agents by signing the legacy canonical payload shape only; newer clients validate `desiredConfig` by recomputing it from the signed command decision and signed settings payload, restricted to the agent-applied settings key schema. Broader applied-state -reporting and connections-ledger rollout presentation remain outside this -backend foundation. +reporting remains the next contract gap: until the runtime report carries a +comparable applied config fingerprint, `/api/connections` must surface desired +config metadata as pending or unknown and must not claim rollout convergence +from host report fields such as `commandsEnabled` or `diskExclude`. That same canonical /api/auto-register path must also complete the live post-registration contract after persistence: it must trigger discovery refresh and emit the canonical `node_auto_registered` WebSocket payload instead of diff --git a/docs/release-control/v6/internal/subsystems/api-contracts.md b/docs/release-control/v6/internal/subsystems/api-contracts.md index 82cfce651..c1708b1d5 100644 --- a/docs/release-control/v6/internal/subsystems/api-contracts.md +++ b/docs/release-control/v6/internal/subsystems/api-contracts.md @@ -868,6 +868,13 @@ the canonical monitored-system blocked payload. kernel, architecture, and command capability, so settings surfaces can render recognizable standalone-host identity without a second inventory fetch or frontend-local host reconciliation rules. + Agent config drift on that same payload must source desired fingerprints + from `Monitor.GetHostAgentConfig(...).DesiredConfig` or the same + `remoteconfig.BuildDesiredConfigMetadata` path. The aggregator must not + manufacture convergence by assigning desired and applied to the same local + report-field fingerprint; when host state lacks a trustworthy applied + config fingerprint, `configDrift` stays pending or unknown and rollout + stays non-current. Appliance-specific Pulse Agent compatibility is an additive host-profile fact on that same identity payload. For Unraid and similar host profiles, `agentIdentity.platform` remains the canonical runtime platform such as diff --git a/docs/release-control/v6/internal/subsystems/storage-recovery.md b/docs/release-control/v6/internal/subsystems/storage-recovery.md index 019e043ed..f01df5822 100644 --- a/docs/release-control/v6/internal/subsystems/storage-recovery.md +++ b/docs/release-control/v6/internal/subsystems/storage-recovery.md @@ -583,7 +583,10 @@ bypass the API fail-closed execution gate. but must not fork their own version-comparison semantics, desired/applied config-drift classifier, rollout-state classifier, credential-health classifier, command-policy vocabulary, or another agent lifecycle - vocabulary. + vocabulary. If `/api/connections` reports agent config drift as pending or + unknown because no trustworthy applied fingerprint exists, storage and + recovery must preserve that uncertainty instead of translating it into a + storage-local current/drifted verdict. 22. Keep backend-native platform actions on the adjacent AI/runtime and platform contracts. When `internal/api/` wires native TrueNAS app control for Assistant, storage and recovery may consume the refreshed recovery points afterward, but they must not grow a parallel recovery-local action transport or action-specific payload shape. 23. Keep backend-native platform diagnostics on the adjacent AI/runtime and platform contracts. When `internal/api/` wires native TrueNAS app log reads for Assistant, storage and recovery may use those diagnostics during investigation, but they must not grow a parallel recovery-local log transport or diagnostic payload shape. 24. Keep backend-native platform configuration reads on the adjacent AI/runtime and platform contracts. When `internal/api/` wires native TrueNAS app config for Assistant, storage and recovery may use that runtime shape during investigation, but they must not grow a parallel recovery-local config transport or provider-shaped configuration payload. diff --git a/frontend-modern/src/api/__tests__/connections.test.ts b/frontend-modern/src/api/__tests__/connections.test.ts index a3aa1aa7d..74d4ea8d1 100644 --- a/frontend-modern/src/api/__tests__/connections.test.ts +++ b/frontend-modern/src/api/__tests__/connections.test.ts @@ -194,6 +194,60 @@ describe('ConnectionsAPI', () => { }); }); + it('list() preserves pending agent config drift without an applied fingerprint', async () => { + const connections: Connection[] = [ + { + id: 'agent:mini-pc', + type: 'agent', + name: 'mini-pc', + address: 'mini-pc', + state: 'active', + stateReason: '', + enabled: true, + surfaces: ['host'], + scope: { host: true }, + lastSeen: '2026-04-22T20:00:00Z', + lastError: null, + source: 'agent', + fleet: { + enrollmentState: 'enrolled', + livenessState: 'active', + versionDrift: 'current', + adapterHealth: 'healthy', + configRollout: 'reported', + credentialStatus: 'verified', + updateStatus: 'current', + remoteControl: 'enabled', + configDrift: { + status: 'pending', + desired: { version: 'host-agent-config/v1', hash: 'sha256:desired' }, + reason: 'Pulse has not received a comparable applied agent configuration fingerprint yet', + }, + rollout: { + status: 'pending', + stage: 'pending', + reason: 'waiting for the agent to report an applied configuration fingerprint', + }, + }, + capabilities: { supportsPause: false, supportsScope: false, supportsTest: false }, + }, + ]; + mockedApiFetchJSON.mockResolvedValueOnce({ connections }); + + const result = await ConnectionsAPI.list(); + + expect(result.connections[0]?.fleet?.configDrift).toEqual({ + status: 'pending', + desired: { version: 'host-agent-config/v1', hash: 'sha256:desired' }, + reason: 'Pulse has not received a comparable applied agent configuration fingerprint yet', + }); + expect(result.connections[0]?.fleet?.rollout).toEqual({ + status: 'pending', + stage: 'pending', + reason: 'waiting for the agent to report an applied configuration fingerprint', + }); + }); + it('list() preserves agent identity metadata on agent-backed connections', async () => { const connections: Connection[] = [ { diff --git a/internal/api/connections_aggregator.go b/internal/api/connections_aggregator.go index 304d42055..dc1318292 100644 --- a/internal/api/connections_aggregator.go +++ b/internal/api/connections_aggregator.go @@ -94,6 +94,7 @@ type aggregatorInputs struct { availabilityTargets []config.AvailabilityTarget availabilityStatuses map[string]monitoring.AvailabilityProbeStatus hosts []models.Host + agentDesiredConfigs map[string]ConnectionFleetConfigFingerprint instanceHealth map[string]monitoring.InstanceHealth expectedAgentVersion string now time.Time @@ -131,7 +132,8 @@ func buildConnections(in aggregatorInputs) []Connection { out = append(out, buildAvailabilityConnection(target, in.availabilityStatuses[target.ID], now)) } for _, host := range in.hosts { - out = append(out, buildAgentConnection(host, in.expectedAgentVersion, now)) + desiredConfig := connectionAgentConfigFingerprintForHost(in.agentDesiredConfigs, host.ID) + out = append(out, buildAgentConnection(host, in.expectedAgentVersion, now, desiredConfig)) } sort.Slice(out, func(i, j int) bool { @@ -341,7 +343,7 @@ func buildAvailabilityConnection(target config.AvailabilityTarget, status monito // buildAgentConnection derives a connection row from an agent Host record. // Agents have no pause toggle and no scope — reports are all-or-nothing — // so capability flags are off. -func buildAgentConnection(host models.Host, expectedAgentVersion string, now time.Time) Connection { +func buildAgentConnection(host models.Host, expectedAgentVersion string, now time.Time, desiredConfig *ConnectionFleetConfigFingerprint) Connection { name := host.DisplayName if strings.TrimSpace(name) == "" { name = host.Hostname @@ -399,7 +401,7 @@ func buildAgentConnection(host models.Host, expectedAgentVersion string, now tim AgentUpdateAvailable: updateAvailable, Capabilities: ConnectionCapabilities{SupportsPause: false, SupportsScope: false, SupportsTest: false}, }, now) - conn.Fleet.ConfigDrift = connectionFleetAgentConfigDrift(conn, host) + conn.Fleet.ConfigDrift = connectionFleetAgentConfigDrift(conn, desiredConfig) conn.Fleet.CredentialHealth = connectionFleetAgentCredentialHealth(conn, host, now) conn.Fleet.CommandPolicy = connectionFleetAgentCommandPolicy(conn, host) conn.Fleet.Rollout = connectionFleetRollout(conn) @@ -548,7 +550,11 @@ func connectionFleetConfigDrift(conn Connection) *ConnectionFleetConfigDrift { } } -func connectionFleetAgentConfigDrift(conn Connection, host models.Host) *ConnectionFleetConfigDrift { +func connectionFleetAgentConfigDrift(conn Connection, desired *ConnectionFleetConfigFingerprint) *ConnectionFleetConfigDrift { + return connectionFleetAgentConfigDriftForFingerprints(conn, desired, nil) +} + +func connectionFleetAgentConfigDriftForFingerprints(conn Connection, desired, applied *ConnectionFleetConfigFingerprint) *ConnectionFleetConfigDrift { if !conn.Enabled || conn.State == ConnectionStatePaused { return &ConnectionFleetConfigDrift{ Status: fleetStatePaused, @@ -556,30 +562,41 @@ func connectionFleetAgentConfigDrift(conn Connection, host models.Host) *Connect } } + if desired == nil { + return &ConnectionFleetConfigDrift{ + Status: fleetStateUnknown, + Reason: "Pulse has not resolved canonical desired agent configuration metadata", + } + } + if conn.LastSeen == nil { return &ConnectionFleetConfigDrift{ - Status: fleetStateUnknown, - Reason: "Pulse has not received an applied agent configuration report yet", + Status: fleetStateUnknown, + Desired: desired, + Reason: "Pulse has not received an agent report to compare against desired configuration", } } - applied := connectionConfigFingerprint(connectionAgentConfigFingerprintVersion, map[string]any{ - "commandsEnabled": host.CommandsEnabled, - "diskExclude": host.DiskExclude, - }) if applied == nil { return &ConnectionFleetConfigDrift{ - Status: fleetStateUnknown, - Reason: "applied agent configuration fingerprint could not be derived", + Status: fleetStatePending, + Desired: desired, + Reason: "Pulse has not received a comparable applied agent configuration fingerprint yet", } } + status := fleetConfigDriftCurrent + reason := "reported applied agent configuration matches the desired fleet policy" + if desired.Version != applied.Version || desired.Hash != applied.Hash { + status = fleetConfigDriftDrifted + reason = "desired agent configuration fingerprint differs from the reported applied fingerprint" + } return &ConnectionFleetConfigDrift{ - Status: fleetConfigDriftCurrent, - Desired: applied, + Status: status, + Desired: desired, Applied: applied, LastObservedAt: conn.LastSeen, - Reason: "reported agent configuration matches the active fleet policy snapshot", + Reason: reason, } } @@ -617,11 +634,26 @@ func connectionFleetRollout(conn Connection) *ConnectionFleetRolloutState { Reason: "waiting for the agent to report applied configuration", } } - if conn.Fleet.ConfigDrift != nil && conn.Fleet.ConfigDrift.Status == fleetConfigDriftDrifted { - return &ConnectionFleetRolloutState{ - Status: fleetStatePending, - Stage: fleetRolloutStagePending, - Reason: "desired configuration has not converged on the reported runtime", + if conn.Type == ConnectionTypeAgent && conn.Fleet.ConfigDrift != nil { + switch conn.Fleet.ConfigDrift.Status { + case fleetConfigDriftDrifted: + return &ConnectionFleetRolloutState{ + Status: fleetStatePending, + Stage: fleetRolloutStagePending, + Reason: "desired configuration has not converged on the reported runtime", + } + case fleetStatePending: + return &ConnectionFleetRolloutState{ + Status: fleetStatePending, + Stage: fleetRolloutStagePending, + Reason: "waiting for the agent to report an applied configuration fingerprint", + } + case fleetStateUnknown: + return &ConnectionFleetRolloutState{ + Status: fleetStateUnknown, + Stage: fleetRolloutStagePending, + Reason: "rollout state cannot be confirmed without comparable desired and applied agent config fingerprints", + } } } stage := fleetRolloutStageLocal @@ -755,6 +787,54 @@ func connectionConfigFingerprint(version string, payload any) *ConnectionFleetCo } } +func connectionAgentDesiredConfigFingerprints(monitor *monitoring.Monitor, hosts []models.Host) map[string]ConnectionFleetConfigFingerprint { + if monitor == nil || len(hosts) == 0 { + return nil + } + + fingerprints := make(map[string]ConnectionFleetConfigFingerprint, len(hosts)) + for _, host := range hosts { + hostID := strings.TrimSpace(host.ID) + if hostID == "" { + continue + } + cfg := monitor.GetHostAgentConfig(hostID) + if cfg.DesiredConfig == nil { + continue + } + if fp := connectionConfigFingerprintFromMetadata(cfg.DesiredConfig.Version, cfg.DesiredConfig.Hash); fp != nil { + fingerprints[hostID] = *fp + } + } + if len(fingerprints) == 0 { + return nil + } + return fingerprints +} + +func connectionAgentConfigFingerprintForHost(fingerprints map[string]ConnectionFleetConfigFingerprint, hostID string) *ConnectionFleetConfigFingerprint { + if len(fingerprints) == 0 { + return nil + } + fp, ok := fingerprints[strings.TrimSpace(hostID)] + if !ok { + return nil + } + return connectionConfigFingerprintFromMetadata(fp.Version, fp.Hash) +} + +func connectionConfigFingerprintFromMetadata(version, hash string) *ConnectionFleetConfigFingerprint { + version = strings.TrimSpace(version) + hash = strings.TrimSpace(hash) + if version == "" || hash == "" { + return nil + } + return &ConnectionFleetConfigFingerprint{ + Version: version, + Hash: hash, + } +} + func connectionProxmoxCredentialKind(user, password, tokenName, tokenValue string) string { if strings.TrimSpace(tokenName) != "" || strings.TrimSpace(tokenValue) != "" { return fleetCredentialKindToken diff --git a/internal/api/connections_aggregator_test.go b/internal/api/connections_aggregator_test.go index 227b90492..4e3e5be4a 100644 --- a/internal/api/connections_aggregator_test.go +++ b/internal/api/connections_aggregator_test.go @@ -8,10 +8,13 @@ import ( "github.com/rcourtman/pulse-go-rewrite/internal/config" "github.com/rcourtman/pulse-go-rewrite/internal/models" "github.com/rcourtman/pulse-go-rewrite/internal/monitoring" + "github.com/rcourtman/pulse-go-rewrite/internal/remoteconfig" ) func ptrTime(t time.Time) *time.Time { return &t } +func ptrBool(v bool) *bool { return &v } + func healthEntry(lastSuccess *time.Time, errMessage, errCategory string, breakerState string) monitoring.InstanceHealth { ps := monitoring.InstancePollStatus{LastSuccess: lastSuccess} if errMessage != "" { @@ -27,6 +30,18 @@ func healthEntry(lastSuccess *time.Time, errMessage, errCategory string, breaker } } +func desiredAgentConfigFingerprint(t *testing.T, commandsEnabled *bool, settings map[string]interface{}) ConnectionFleetConfigFingerprint { + t.Helper() + metadata, err := remoteconfig.BuildDesiredConfigMetadata(commandsEnabled, settings) + if err != nil { + t.Fatalf("BuildDesiredConfigMetadata: %v", err) + } + return ConnectionFleetConfigFingerprint{ + Version: metadata.Version, + Hash: metadata.Hash, + } +} + func TestDeriveConnectionState_Paused(t *testing.T) { state, reason, _, _ := deriveConnectionState(false, monitoring.InstanceHealth{}, time.Now()) if state != ConnectionStatePaused { @@ -360,6 +375,7 @@ func TestBuildConnections_AgentVersionUpdateAvailability(t *testing.T) { func TestBuildConnections_AgentFleetGovernance(t *testing.T) { now := time.Now() + currentDesired := desiredAgentConfigFingerprint(t, ptrBool(true), nil) in := aggregatorInputs{ hosts: []models.Host{ { @@ -380,6 +396,9 @@ func TestBuildConnections_AgentFleetGovernance(t *testing.T) { Hostname: "pending", }, }, + agentDesiredConfigs: map[string]ConnectionFleetConfigFingerprint{ + "current": currentDesired, + }, expectedAgentVersion: "6.0.2", now: now, } @@ -402,14 +421,14 @@ func TestBuildConnections_AgentFleetGovernance(t *testing.T) { t.Fatalf("current agent fleet governance = %+v", current) } if current.ConfigDrift == nil || - current.ConfigDrift.Status != fleetConfigDriftCurrent || + current.ConfigDrift.Status != fleetStatePending || current.ConfigDrift.Desired == nil || - current.ConfigDrift.Applied == nil || + current.ConfigDrift.Applied != nil || current.ConfigDrift.Desired.Version != connectionAgentConfigFingerprintVersion || - current.ConfigDrift.Desired.Hash != current.ConfigDrift.Applied.Hash { + current.ConfigDrift.Desired.Hash != currentDesired.Hash { t.Fatalf("current agent config drift = %+v", current.ConfigDrift) } - if current.Rollout == nil || current.Rollout.Status != fleetStateCurrent || current.Rollout.Stage != fleetRolloutStageApplied { + if current.Rollout == nil || current.Rollout.Status != fleetStatePending || current.Rollout.Stage != fleetRolloutStagePending { t.Fatalf("current agent rollout = %+v", current.Rollout) } if current.CommandPolicy == nil || @@ -449,6 +468,89 @@ func TestBuildConnections_AgentFleetGovernance(t *testing.T) { } } +func TestBuildConnections_AgentConfigDriftUsesCanonicalDesiredMetadataWithoutSelfComparing(t *testing.T) { + now := time.Now() + desired := desiredAgentConfigFingerprint(t, ptrBool(true), map[string]interface{}{ + "interval": "10s", + }) + in := aggregatorInputs{ + hosts: []models.Host{ + { + ID: "agent-1", + Hostname: "agent-1", + LastSeen: now, + CommandsEnabled: false, + DiskExclude: []string{"/dev/loop*"}, + }, + }, + agentDesiredConfigs: map[string]ConnectionFleetConfigFingerprint{ + "agent-1": desired, + }, + now: now, + } + + got := buildConnections(in) + if len(got) != 1 { + t.Fatalf("expected 1 connection, got %d", len(got)) + } + drift := got[0].Fleet.ConfigDrift + if drift == nil { + t.Fatal("expected agent config drift metadata") + } + if drift.Status != fleetStatePending { + t.Fatalf("config drift status = %q, want pending", drift.Status) + } + if drift.Desired == nil || *drift.Desired != desired { + t.Fatalf("desired config drift fingerprint = %+v, want %+v", drift.Desired, desired) + } + if drift.Applied != nil { + t.Fatalf("applied config fingerprint should be absent until agent reports a comparable fingerprint, got %+v", drift.Applied) + } + + selfCompared := connectionConfigFingerprint(connectionAgentConfigFingerprintVersion, map[string]any{ + "commandsEnabled": false, + "diskExclude": []string{"/dev/loop*"}, + }) + if selfCompared == nil { + t.Fatal("expected local self-comparison fingerprint to be derivable") + } + if drift.Desired.Hash == selfCompared.Hash { + t.Fatalf("desired config hash reused report-field fingerprint %q", drift.Desired.Hash) + } + if got[0].Fleet.Rollout == nil || got[0].Fleet.Rollout.Status == fleetStateCurrent { + t.Fatalf("rollout should not claim current without an applied config comparison, got %+v", got[0].Fleet.Rollout) + } +} + +func TestConnectionFleetAgentConfigDriftComparesAppliedFingerprintsWhenAvailable(t *testing.T) { + now := time.Now() + conn := Connection{ + Type: ConnectionTypeAgent, + State: ConnectionStateActive, + Enabled: true, + LastSeen: &now, + } + desired := &ConnectionFleetConfigFingerprint{Version: connectionAgentConfigFingerprintVersion, Hash: "sha256:desired"} + applied := &ConnectionFleetConfigFingerprint{Version: connectionAgentConfigFingerprintVersion, Hash: "sha256:applied"} + + drifted := connectionFleetAgentConfigDriftForFingerprints(conn, desired, applied) + if drifted.Status != fleetConfigDriftDrifted || + drifted.Desired != desired || + drifted.Applied != applied || + drifted.LastObservedAt == nil { + t.Fatalf("drifted config comparison = %+v", drifted) + } + + matchingApplied := &ConnectionFleetConfigFingerprint{Version: desired.Version, Hash: desired.Hash} + current := connectionFleetAgentConfigDriftForFingerprints(conn, desired, matchingApplied) + if current.Status != fleetConfigDriftCurrent || + current.Desired != desired || + current.Applied != matchingApplied || + current.LastObservedAt == nil { + t.Fatalf("current config comparison = %+v", current) + } +} + func TestBuildConnections_PlatformFleetGovernance(t *testing.T) { now := time.Now() lastSuccess := now.Add(-30 * time.Second) diff --git a/internal/api/connections_alerts.go b/internal/api/connections_alerts.go index 2395171d4..0f107db75 100644 --- a/internal/api/connections_alerts.go +++ b/internal/api/connections_alerts.go @@ -39,6 +39,7 @@ func buildAggregatorInputs(ctx context.Context, cfg *config.Config, persistence if monitor != nil { inputs.hosts = monitor.HostsSnapshot() + inputs.agentDesiredConfigs = connectionAgentDesiredConfigFingerprints(monitor, inputs.hosts) inputs.instanceHealth = instanceHealthByKey(monitor.SchedulerHealth()) inputs.availabilityStatuses = monitor.AvailabilityStatusSnapshot() } else {