From 2be14562eed3f8fbcfd33122e4f15fb2ae04470e Mon Sep 17 00:00:00 2001 From: rcourtman Date: Wed, 13 May 2026 23:36:17 +0100 Subject: [PATCH] Preserve infrastructure continuity on first login Ensure unified resource snapshots include recent standalone host-agent continuity so Infrastructure does not briefly undercount connected systems after login or restart. --- docs/release-control/v6/internal/status.json | 32 ++++++++++++- .../v6/internal/subsystems/monitoring.md | 6 +++ .../monitoring/canonical_guardrails_test.go | 5 +++ internal/monitoring/monitor.go | 45 ++++++++++++++++--- .../monitoring/monitor_unified_state_test.go | 30 +++++++++++++ 5 files changed, 112 insertions(+), 6 deletions(-) diff --git a/docs/release-control/v6/internal/status.json b/docs/release-control/v6/internal/status.json index e6d9c75fe..b8b8b3048 100644 --- a/docs/release-control/v6/internal/status.json +++ b/docs/release-control/v6/internal/status.json @@ -3366,6 +3366,11 @@ "path": "internal/monitoring/metrics_history.go", "kind": "file" }, + { + "repo": "pulse", + "path": "internal/monitoring/monitor.go", + "kind": "file" + }, { "repo": "pulse", "path": "internal/monitoring/monitor_backup_poll_test.go", @@ -3376,6 +3381,11 @@ "path": "internal/monitoring/monitor_backups.go", "kind": "file" }, + { + "repo": "pulse", + "path": "internal/monitoring/monitor_unified_state_test.go", + "kind": "file" + }, { "repo": "pulse", "path": "internal/monitoring/poll_providers.go", @@ -4401,7 +4411,7 @@ "status": "partial", "completion": { "state": "bounded-residual", - "summary": "Fleet governance and rollout control now has a first-class governed floor: /api/connections carries enrollment, liveness, version drift, adapter health, config rollout, credential status, update posture, and remote-control posture as a canonical fleet projection, and Infrastructure systems surfaces those facts as a central fleet-governance strip plus row-level attention signals without paid-surface or monitor-count gating. Deeper desired-vs-applied config drift, staged rollout operations, richer credential rotation state, and command-policy enforcement remain a named post-RC hardening track.", + "summary": "Fleet governance and rollout control now has a first-class governed floor: /api/connections carries enrollment, liveness, version drift, adapter health, config rollout, credential status, update posture, and remote-control posture as a canonical fleet projection, Infrastructure systems surfaces those facts as a central fleet-governance strip plus row-level attention signals without paid-surface or monitor-count gating, and first-login Infrastructure resource snapshots rehydrate recent standalone agent continuity instead of waiting for a fresh live report. Deeper desired-vs-applied config drift, staged rollout operations, richer credential rotation state, and command-policy enforcement remain a named post-RC hardening track.", "tracking": [ { "kind": "lane-followup", @@ -4496,6 +4506,26 @@ "repo": "pulse", "path": "internal/api/contract_test.go", "kind": "file" + }, + { + "repo": "pulse", + "path": "internal/api/resources.go", + "kind": "file" + }, + { + "repo": "pulse", + "path": "internal/monitoring/canonical_guardrails_test.go", + "kind": "file" + }, + { + "repo": "pulse", + "path": "internal/monitoring/monitor.go", + "kind": "file" + }, + { + "repo": "pulse", + "path": "internal/monitoring/monitor_unified_state_test.go", + "kind": "file" } ] }, diff --git a/docs/release-control/v6/internal/subsystems/monitoring.md b/docs/release-control/v6/internal/subsystems/monitoring.md index fc9044778..aa791ad2a 100644 --- a/docs/release-control/v6/internal/subsystems/monitoring.md +++ b/docs/release-control/v6/internal/subsystems/monitoring.md @@ -155,6 +155,12 @@ must emit canonical reason codes such as `supplemental_inventory_rebuild_pending` when usage cannot yet be resolved, so settings and support surfaces can show verification or recovery state without inventing their own readiness heuristics or falling back to a fake count. +That same continuity rule applies to canonical unified resource snapshots. +`internal/monitoring/monitor.go` must overlay recent standalone host-agent +continuity records onto `UnifiedResourceSnapshot()` and +`GetUnifiedReadStateOrSnapshot()` results, so first-login and post-restart +Infrastructure views retain the durable agent-backed systems Pulse already +knows about while live reports and supplemental providers catch up. That same monitoring owner also governs collector payload compatibility at the shared boundary. Podman container stats must honor Podman's compat payload when it exposes a direct CPU percentage and otherwise fall back to Podman's diff --git a/internal/monitoring/canonical_guardrails_test.go b/internal/monitoring/canonical_guardrails_test.go index 2da9b267f..f10419c42 100644 --- a/internal/monitoring/canonical_guardrails_test.go +++ b/internal/monitoring/canonical_guardrails_test.go @@ -268,6 +268,11 @@ func TestMonitoredSystemUsageReadinessGuardrailsRemainCanonical(t *testing.T) { "hostContinuityStore: config.NewHostContinuityStore(cfg.DataPath, nil),", "func (m *Monitor) HostsSnapshot() []models.Host {", "readState = m.readStateWithStandaloneHostContinuity(readState)", + "func (m *Monitor) unifiedStateViewWithStandaloneHostContinuity(view monitorUnifiedStateView) monitorUnifiedStateView {", + "view.readState = readState", + "view.resources = resources", + "func latestUnifiedResourceLastSeen(resources []unifiedresources.Resource) time.Time {", + "return m.unifiedStateViewWithStandaloneHostContinuity(monitorUnifiedStateView{", }, "monitored_system_usage.go": { "MonitoredSystemUsageUnavailableMonitorState", diff --git a/internal/monitoring/monitor.go b/internal/monitoring/monitor.go index 43a2c5cbc..0cea174d9 100644 --- a/internal/monitoring/monitor.go +++ b/internal/monitoring/monitor.go @@ -4056,6 +4056,10 @@ type monitorUnifiedStateView struct { freshness time.Time } +type unifiedResourceReadStateLister interface { + GetAll() []unifiedresources.Resource +} + func monitorUnifiedStateViewFromSnapshot(snapshot models.StateSnapshot) monitorUnifiedStateView { registry := unifiedresources.NewRegistry(nil) registry.IngestSnapshot(snapshot) @@ -4078,6 +4082,37 @@ func monitorUnifiedStateViewFromResources(resources []unifiedresources.Resource, } } +func (m *Monitor) unifiedStateViewWithStandaloneHostContinuity(view monitorUnifiedStateView) monitorUnifiedStateView { + if m == nil || view.readState == nil { + return view + } + + readState := m.readStateWithStandaloneHostContinuity(view.readState) + view.readState = readState + + lister, ok := readState.(unifiedResourceReadStateLister) + if !ok { + return view + } + + resources := lister.GetAll() + view.resources = resources + if view.freshness.IsZero() { + view.freshness = latestUnifiedResourceLastSeen(resources) + } + return view +} + +func latestUnifiedResourceLastSeen(resources []unifiedresources.Resource) time.Time { + var latest time.Time + for _, resource := range resources { + if resource.LastSeen.After(latest) { + latest = resource.LastSeen + } + } + return latest +} + func (m *Monitor) currentUnifiedStateView() monitorUnifiedStateView { if m == nil { return monitorUnifiedStateView{} @@ -4097,25 +4132,25 @@ func (m *Monitor) currentUnifiedStateView() monitorUnifiedStateView { m.mu.RUnlock() if store == nil { - return monitorUnifiedStateViewFromSnapshot(m.GetState()) + return m.unifiedStateViewWithStandaloneHostContinuity(monitorUnifiedStateViewFromSnapshot(m.GetState())) } resources := store.GetAll() freshness := unifiedResourceFreshness(store, state) if readState, ok := store.(unifiedresources.ReadState); ok { - return monitorUnifiedStateView{ + return m.unifiedStateViewWithStandaloneHostContinuity(monitorUnifiedStateView{ resources: resources, readState: readState, freshness: freshness, - } + }) } if len(resources) > 0 || state == nil { - return monitorUnifiedStateViewFromResources(resources, freshness) + return m.unifiedStateViewWithStandaloneHostContinuity(monitorUnifiedStateViewFromResources(resources, freshness)) } - return monitorUnifiedStateViewFromSnapshot(m.GetState()) + return m.unifiedStateViewWithStandaloneHostContinuity(monitorUnifiedStateViewFromSnapshot(m.GetState())) } func (m *Monitor) currentUnifiedResourceFreshness() time.Time { diff --git a/internal/monitoring/monitor_unified_state_test.go b/internal/monitoring/monitor_unified_state_test.go index 0ef21bc3d..1f575dd91 100644 --- a/internal/monitoring/monitor_unified_state_test.go +++ b/internal/monitoring/monitor_unified_state_test.go @@ -177,6 +177,36 @@ func TestMonitorUnifiedResourceSnapshotFallsBackToSnapshotWhenStoreEmpty(t *test } } +func TestMonitorUnifiedResourceSnapshotIncludesRecentStandaloneHostContinuity(t *testing.T) { + now := time.Date(2026, 5, 13, 14, 30, 0, 0, time.UTC) + store := config.NewHostContinuityStore(t.TempDir(), nil) + if err := store.Upsert(config.HostContinuityEntry{ + HostID: "host-1", + ReportHostID: "machine-1", + Hostname: "host-1.local", + DisplayName: "Host One", + MachineID: "machine-1", + AgentVersion: "6.0.0-rc.5", + LastSeen: now, + }); err != nil { + t.Fatalf("Upsert continuity: %v", err) + } + + m := &Monitor{ + state: models.NewState(), + resourceStore: unifiedresources.NewMonitorAdapter(unifiedresources.NewRegistry(nil)), + hostContinuityStore: store, + } + + resources, freshness := m.UnifiedResourceSnapshot() + if !hasUnifiedResourceName(resources, "Host One") { + t.Fatalf("expected continuity-backed resource in unified snapshot, got %#v", resources) + } + if freshness.IsZero() { + t.Fatal("expected continuity-backed snapshot to carry non-zero freshness") + } +} + func TestMonitorUnifiedResourceSnapshotPrefersStoreFreshness(t *testing.T) { state := models.NewState() state.UpsertHost(models.Host{