Explain canonical monitored system status reasons

2026-05-09 19:32:24 +00:00 · 2026-03-23 22:55:09 +00:00 · 2026-03-23 22:55:09 +00:00 · 1e93ede11e
commit 1e93ede11e
parent 3a6a376b33
14 changed files with 625 additions and 47 deletions
--- a/docs/release-control/v6/internal/subsystems/agent-lifecycle.md
+++ b/docs/release-control/v6/internal/subsystems/agent-lifecycle.md
@ -234,6 +234,10 @@ That same ledger read now also carries backend-owned status explanation copy,
 and lifecycle-adjacent details must render it beside the counting rationale so
 operators can interpret warning, offline, and unknown states without inventing
 local status semantics.
+Those status details are now structured as well: lifecycle-adjacent consumers
+must preserve the canonical reason list from the ledger read so operators can
+see which grouped source or surface degraded and when it last reported,
+instead of only seeing a generic warning/offline paragraph.
 Lifecycle-adjacent workspace copy must also keep the same commercial framing:
 infrastructure operations may point operators to Pulse Pro for billing, but it
 must describe that boundary in monitored-system, plan-limit, and license-status
--- a/docs/release-control/v6/internal/subsystems/api-contracts.md
+++ b/docs/release-control/v6/internal/subsystems/api-contracts.md
@ -243,6 +243,11 @@ That same contract now also owns the backend-authored status explanation paired
 with that enum, and the monitored-system ledger details surface must render it
 alongside the counting explanation instead of inventing page-local wording for
 what online, warning, offline, or unknown means.
+That nested status explanation is now a structured contract, not summary-only
+copy: `/api/license/monitored-system-ledger` must preserve the canonical
+summary plus the ordered reason list from unified resources, including the
+degraded source or surface, its status, and its last-seen timestamp, so mixed
+fresh/stale grouped systems remain explainable through one governed API shape.
 That client contract must also fail closed when older or partial payloads omit
 the nested explanation object: the frontend may normalize missing explanation
 fields to empty reasons/surfaces plus a safe default summary, but it must not
--- a/docs/release-control/v6/internal/subsystems/cloud-paid.md
+++ b/docs/release-control/v6/internal/subsystems/cloud-paid.md
@ -215,6 +215,10 @@ view may normalize a safe default when that field is absent during mixed-version
 rollouts, but it must render the canonical backend explanation when present
 instead of inventing page-local wording for what warning, offline, or unknown
 means on a counted monitored system.
+That same cloud-paid surface must now also render the canonical status reason
+list when present, so customers can see exactly which grouped source or
+top-level surface degraded and when it last reported rather than only reading
+generic status copy beside a fresh aggregate `Last Seen` value.
 Frontend billing/admin surfaces must not synthesize `plan_version` from
 subscription lifecycle state. When a hosted billing record lacks a plan label,
 the UI must preserve that absence instead of fabricating values like `active`
--- a/docs/release-control/v6/internal/subsystems/storage-recovery.md
+++ b/docs/release-control/v6/internal/subsystems/storage-recovery.md
@ -370,6 +370,12 @@ storage-adjacent API wiring may consume the canonical monitored-system ledger
 and monitored-system cap helpers, but it must not revive deleted agent-era
 helper names or imply that API-backed infrastructure sits outside the counted
 system model.
+That same shared `internal/api/` dependency now also assumes monitored-system
+ledger status details stay canonical and source-aware: storage- or recovery-
+adjacent consumers may read the ledger’s nested status explanation, but they
+must preserve the backend-provided reason list for stale or offline grouped
+sources instead of reducing those mixed fresh/stale system states back to a
+generic label.
 That same shared `internal/api/` dependency now also assumes self-hosted
 commercial counting is canonical at the top-level monitored-system boundary:
 shared setup, deploy, entitlement, and API-backed monitoring helpers may not
--- a/docs/release-control/v6/internal/subsystems/unified-resources.md
+++ b/docs/release-control/v6/internal/subsystems/unified-resources.md
@ -171,6 +171,14 @@ grouping reasons plus included top-level surfaces, and fall back to an
 explicit standalone explanation when no cross-source merge occurred. Support
 and billing surfaces must consume that shared explanation contract instead of
 reconstructing count reasons from API-local heuristics.
+That same monitored-system contract now also owns canonical runtime-status
+explanations. When a grouped monitored system resolves to warning, offline, or
+unknown, unified resources must expose the shared summary plus structured
+degraded-status reasons derived from the grouped top-level resources and their
+source freshness state, including which source or surface degraded and the
+corresponding last-seen timestamp. Billing and support surfaces must consume
+that shared reason list instead of trying to infer why a fresh overall
+`last_seen` can still coincide with warning status.

 The unified-resource runtime now also owns the durable change timeline for the
 canonical resource view. `internal/unifiedresources/monitor_adapter.go` feeds
--- a/frontend-modern/src/api/tests/monitoredSystemLedger.test.ts
+++ b/frontend-modern/src/api/tests/monitoredSystemLedger.test.ts
@ -38,6 +38,7 @@ describe('MonitoredSystemLedgerAPI', () => {
          status: 'online',
          status_explanation: {
            summary: 'All included top-level collection paths currently report online status.',
+            reasons: [],
          },
          last_seen: '2026-01-01T00:00:00Z',
          source: 'agent',
@ -63,6 +64,7 @@ describe('MonitoredSystemLedgerAPI', () => {

    expect(result.systems[0]?.explanation.summary).toContain('Counts as one monitored system');
    expect(result.systems[0]?.status_explanation?.summary).toContain('currently report online');
+    expect(result.systems[0]?.status_explanation?.reasons).toEqual([]);
    expect(result.systems[0]?.explanation.reasons).toHaveLength(1);
    expect(result.systems[0]?.explanation.surfaces).toHaveLength(1);
  });
@ -86,6 +88,7 @@ describe('MonitoredSystemLedgerAPI', () => {

    expect(result.systems[0]?.explanation.summary).toContain('counts this top-level collection path');
    expect(result.systems[0]?.status_explanation?.summary).toContain('currently report online');
+    expect(result.systems[0]?.status_explanation?.reasons).toEqual([]);
    expect(result.systems[0]?.explanation.reasons).toEqual([]);
    expect(result.systems[0]?.explanation.surfaces).toEqual([]);
  });
@ -110,6 +113,50 @@ describe('MonitoredSystemLedgerAPI', () => {
    expect(result.systems[0]?.status).toBe('warning');
  });

+  it('preserves canonical status explanation reasons from the API contract', async () => {
+    vi.mocked(apiFetchJSON).mockResolvedValueOnce({
+      systems: [
+        {
+          name: 'Tower',
+          type: 'host',
+          status: 'warning',
+          status_explanation: {
+            summary: 'At least one included source is stale, so Pulse marks this monitored system as warning.',
+            reasons: [
+              {
+                kind: 'source-stale',
+                name: 'Tower',
+                type: 'host',
+                source: 'agent',
+                status: 'stale',
+                last_seen: '2026-03-23T11:55:00Z',
+                summary: 'Agent data for Tower is stale (last reported 2026-03-23T11:55:00Z).',
+              },
+            ],
+          },
+          last_seen: '2026-03-23T11:59:50Z',
+          source: 'multiple',
+        },
+      ],
+      total: 1,
+      limit: 5,
+    });
+
+    const result = await MonitoredSystemLedgerAPI.getLedger();
+
+    expect(result.systems[0]?.status_explanation?.reasons).toEqual([
+      {
+        kind: 'source-stale',
+        name: 'Tower',
+        type: 'host',
+        source: 'agent',
+        status: 'stale',
+        last_seen: '2026-03-23T11:55:00Z',
+        summary: 'Agent data for Tower is stale (last reported 2026-03-23T11:55:00Z).',
+      },
+    ]);
+  });
+
  it('fails closed to unknown for unsupported status values', async () => {
    vi.mocked(apiFetchJSON).mockResolvedValueOnce({
      systems: [
--- a/frontend-modern/src/api/monitoredSystemLedger.ts
+++ b/frontend-modern/src/api/monitoredSystemLedger.ts
@ -22,6 +22,23 @@ export interface MonitoredSystemLedgerExplanation {

 export interface MonitoredSystemLedgerStatusExplanation {
  summary: string;
+  reasons: MonitoredSystemLedgerStatusReason[];
+}
+
+export type MonitoredSystemLedgerStatusReasonStatus =
+  | 'online'
+  | 'stale'
+  | 'offline'
+  | 'unknown';
+
+export interface MonitoredSystemLedgerStatusReason {
+  kind: string;
+  name: string;
+  type: string;
+  source: string;
+  status: MonitoredSystemLedgerStatusReasonStatus;
+  last_seen: string;
+  summary: string;
 }

 export interface MonitoredSystemLedgerEntry {
@ -62,6 +79,7 @@ function normalizeMonitoredSystemLedgerEntry(
    status,
    status_explanation: {
      summary: entry.status_explanation?.summary ?? defaultMonitoredSystemStatusExplanation(status),
+      reasons: (entry.status_explanation?.reasons ?? []).map(normalizeMonitoredSystemLedgerStatusReason),
    },
    explanation: {
      summary:
@ -92,10 +110,34 @@ function defaultMonitoredSystemStatusExplanation(status: MonitoredSystemLedgerSt
    case 'online':
      return 'All included top-level collection paths currently report online status.';
    case 'warning':
-      return 'At least one included top-level collection path is degraded or stale.';
+      return 'At least one included top-level collection path is degraded, so Pulse marks this monitored system as warning.';
    case 'offline':
-      return 'At least one included top-level collection path is offline or disconnected.';
+      return 'At least one included source is offline or disconnected, so Pulse marks this monitored system as offline.';
    default:
      return 'Pulse cannot determine a canonical runtime status for this monitored system yet.';
  }
 }
+
+function normalizeMonitoredSystemLedgerStatusReason(
+  reason: MonitoredSystemLedgerStatusReason,
+): MonitoredSystemLedgerStatusReason {
+  return {
+    ...reason,
+    status: normalizeMonitoredSystemLedgerStatusReasonStatus(reason.status),
+    last_seen: reason.last_seen ?? '',
+  };
+}
+
+function normalizeMonitoredSystemLedgerStatusReasonStatus(
+  status: MonitoredSystemLedgerStatusReasonStatus | string | null | undefined,
+): MonitoredSystemLedgerStatusReasonStatus {
+  switch ((status ?? '').trim().toLowerCase()) {
+    case 'online':
+    case 'stale':
+    case 'offline':
+    case 'unknown':
+      return status.trim().toLowerCase() as MonitoredSystemLedgerStatusReasonStatus;
+    default:
+      return 'unknown';
+  }
+}
--- a/frontend-modern/src/components/Settings/MonitoredSystemLedgerPanel.tsx
+++ b/frontend-modern/src/components/Settings/MonitoredSystemLedgerPanel.tsx
@ -51,6 +51,7 @@ function systemStatusExplanation(system: MonitoredSystemLedgerEntry): MonitoredS
    summary:
      system.status_explanation?.summary ??
      'Pulse cannot determine a canonical runtime status for this monitored system yet.',
+    reasons: system.status_explanation?.reasons ?? [],
  };
 }

@ -179,6 +180,13 @@ export function MonitoredSystemLedgerPanel(props: MonitoredSystemLedgerPanelProp
                                <p class="whitespace-normal text-base-content">
                                  {statusExplanation.summary}
                                </p>
+                                <Show when={statusExplanation.reasons.length > 0}>
+                                  <ul class="space-y-1 whitespace-normal text-base-content">
+                                    <For each={statusExplanation.reasons}>
+                                      {(reason) => <li>{reason.summary}</li>}
+                                    </For>
+                                  </ul>
+                                </Show>
                              </div>
                              <p class="whitespace-normal text-base-content">
                                {explanation.summary}
--- a/frontend-modern/src/components/Settings/tests/MonitoredSystemLedgerPanel.test.tsx
+++ b/frontend-modern/src/components/Settings/tests/MonitoredSystemLedgerPanel.test.tsx
@ -77,6 +77,7 @@ describe('MonitoredSystemLedgerPanel', () => {
          status: 'online',
          status_explanation: {
            summary: 'All included top-level collection paths currently report online status.',
+            reasons: [],
          },
          last_seen: '2026-01-01T00:00:00Z',
          source: 'agent',
@ -139,6 +140,7 @@ describe('MonitoredSystemLedgerPanel', () => {
          status: 'online',
          status_explanation: {
            summary: 'All included top-level collection paths currently report online status.',
+            reasons: [],
          },
          last_seen: '2026-01-01T00:00:00Z',
          source: 'agent',
@ -161,7 +163,19 @@ describe('MonitoredSystemLedgerPanel', () => {
          status: 'offline',
          status_explanation: {
            summary:
-              'At least one included top-level collection path is offline or disconnected, so Pulse marks this monitored system as offline.',
+              'At least one included source is offline or disconnected, so Pulse marks this monitored system as offline.',
+            reasons: [
+              {
+                kind: 'source-offline',
+                name: 'server-b',
+                type: 'pbs-server',
+                source: 'pbs',
+                status: 'offline',
+                last_seen: '2026-01-01T23:55:00Z',
+                summary:
+                  'PBS data for server-b is offline or disconnected (last reported 2026-01-01T23:55:00Z).',
+              },
+            ],
          },
          last_seen: '2026-01-02T00:00:00Z',
          source: 'pbs',
@ -220,7 +234,12 @@ describe('MonitoredSystemLedgerPanel', () => {
    expect(screen.getByText('Current status')).toBeInTheDocument();
    expect(
      screen.getByText(
-        'At least one included top-level collection path is offline or disconnected, so Pulse marks this monitored system as offline.',
+        'At least one included source is offline or disconnected, so Pulse marks this monitored system as offline.',
+      ),
+    ).toBeInTheDocument();
+    expect(
+      screen.getByText(
+        'PBS data for server-b is offline or disconnected (last reported 2026-01-01T23:55:00Z).',
      ),
    ).toBeInTheDocument();
    expect(screen.getByText('Included collection paths')).toBeInTheDocument();
@ -239,6 +258,7 @@ describe('MonitoredSystemLedgerPanel', () => {
          status: 'online',
          status_explanation: {
            summary: 'All included top-level collection paths currently report online status.',
+            reasons: [],
          },
          last_seen: '2026-01-01T00:00:00Z',
          source: 'agent',
--- a/internal/api/contract_test.go
+++ b/internal/api/contract_test.go
@ -581,7 +581,18 @@ func TestContract_MonitoredSystemLedgerJSONSnapshot(t *testing.T) {
 				Type:   "host",
 				Status: "warning",
 				StatusExplanation: MonitoredSystemLedgerStatusExplanation{
-					Summary: "At least one included top-level collection path is degraded or stale, so Pulse marks this monitored system as warning.",
+					Summary: "At least one included source is stale, so Pulse marks this monitored system as warning.",
+					Reasons: []MonitoredSystemLedgerStatusReason{
+						{
+							Kind:     "source-stale",
+							Name:     "Tower",
+							Type:     "host",
+							Source:   "agent",
+							Status:   "stale",
+							LastSeen: "2026-03-18T17:25:00Z",
+							Summary:  "Agent data for Tower is stale (last reported 2026-03-18T17:25:00Z).",
+						},
+					},
 				},
 				LastSeen: "2026-03-18T17:30:00Z",
 				Source:   "agent",
@ -620,7 +631,18 @@ func TestContract_MonitoredSystemLedgerJSONSnapshot(t *testing.T) {
 				"type":"host",
 				"status":"warning",
 				"status_explanation":{
-					"summary":"At least one included top-level collection path is degraded or stale, so Pulse marks this monitored system as warning."
+					"summary":"At least one included source is stale, so Pulse marks this monitored system as warning.",
+					"reasons":[
+						{
+							"kind":"source-stale",
+							"name":"Tower",
+							"type":"host",
+							"source":"agent",
+							"status":"stale",
+							"last_seen":"2026-03-18T17:25:00Z",
+							"summary":"Agent data for Tower is stale (last reported 2026-03-18T17:25:00Z)."
+						}
+					]
 				},
 				"last_seen":"2026-03-18T17:30:00Z",
 				"source":"agent",
--- a/internal/api/monitored_system_ledger.go
+++ b/internal/api/monitored_system_ledger.go
@ -23,7 +23,18 @@ type MonitoredSystemLedgerEntry struct {
 }

 type MonitoredSystemLedgerStatusExplanation struct {
-	Summary string `json:"summary"`
+	Summary string                              `json:"summary"`
+	Reasons []MonitoredSystemLedgerStatusReason `json:"reasons"`
+}
+
+type MonitoredSystemLedgerStatusReason struct {
+	Kind     string `json:"kind"`
+	Name     string `json:"name"`
+	Type     string `json:"type"`
+	Source   string `json:"source"`
+	Status   string `json:"status"`
+	LastSeen string `json:"last_seen"`
+	Summary  string `json:"summary"`
 }

 type MonitoredSystemLedgerExplanation struct {
@ -66,6 +77,9 @@ func (r MonitoredSystemLedgerResponse) NormalizeCollections() MonitoredSystemLed
 }

 func (e MonitoredSystemLedgerEntry) NormalizeCollections() MonitoredSystemLedgerEntry {
+	if e.StatusExplanation.Reasons == nil {
+		e.StatusExplanation.Reasons = []MonitoredSystemLedgerStatusReason{}
+	}
 	if e.Explanation.Reasons == nil {
 		e.Explanation.Reasons = []MonitoredSystemLedgerExplanationReason{}
 	}
@ -107,7 +121,7 @@ func (r *Router) handleMonitoredSystemLedger(w http.ResponseWriter, req *http.Re
 			Name:              system.Name,
 			Type:              system.Type,
 			Status:            status,
-			StatusExplanation: monitoredSystemLedgerStatusExplanation(status),
+			StatusExplanation: monitoredSystemLedgerStatusExplanation(system.StatusExplanation, status),
 			LastSeen:          formatLastSeen(system.LastSeen),
 			Source:            system.Source,
 			Explanation:       monitoredSystemLedgerExplanation(system.Explanation),
@ -138,24 +152,53 @@ func normalizeStatus(s string) string {
 	}
 }

-func monitoredSystemLedgerStatusExplanation(status string) MonitoredSystemLedgerStatusExplanation {
+func monitoredSystemLedgerStatusExplanation(
+	explanation unifiedresources.MonitoredSystemStatusExplanation,
+	status string,
+) MonitoredSystemLedgerStatusExplanation {
+	reasons := make([]MonitoredSystemLedgerStatusReason, 0, len(explanation.Reasons))
+	for _, reason := range explanation.Reasons {
+		reasons = append(reasons, MonitoredSystemLedgerStatusReason{
+			Kind:     reason.Kind,
+			Name:     reason.Name,
+			Type:     reason.Type,
+			Source:   reason.Source,
+			Status:   normalizeMonitoredSystemLedgerReasonStatus(reason.Status),
+			LastSeen: formatLastSeen(reason.LastSeen),
+			Summary:  reason.Summary,
+		})
+	}
+
+	summary := explanation.Summary
+	if summary == "" {
+		summary = defaultMonitoredSystemLedgerStatusSummary(status)
+	}
+
+	return MonitoredSystemLedgerStatusExplanation{
+		Summary: summary,
+		Reasons: reasons,
+	}
+}
+
+func defaultMonitoredSystemLedgerStatusSummary(status string) string {
 	switch status {
 	case "online":
-		return MonitoredSystemLedgerStatusExplanation{
-			Summary: "All included top-level collection paths currently report online status.",
-		}
+		return "All included top-level collection paths currently report online status."
 	case "warning":
-		return MonitoredSystemLedgerStatusExplanation{
-			Summary: "At least one included top-level collection path is degraded or stale, so Pulse marks this monitored system as warning.",
-		}
+		return "At least one included top-level collection path is degraded, so Pulse marks this monitored system as warning."
 	case "offline":
-		return MonitoredSystemLedgerStatusExplanation{
-			Summary: "At least one included top-level collection path is offline or disconnected, so Pulse marks this monitored system as offline.",
-		}
+		return "At least one included source is offline or disconnected, so Pulse marks this monitored system as offline."
 	default:
-		return MonitoredSystemLedgerStatusExplanation{
-			Summary: "Pulse cannot determine a canonical runtime status for this monitored system yet.",
-		}
+		return "Pulse cannot determine a canonical runtime status for this monitored system yet."
+	}
+}
+
+func normalizeMonitoredSystemLedgerReasonStatus(status string) string {
+	switch status {
+	case "online", "stale", "offline", "unknown":
+		return status
+	default:
+		return "unknown"
 	}
 }

--- a/internal/api/monitored_system_ledger_test.go
+++ b/internal/api/monitored_system_ledger_test.go
@ -6,6 +6,8 @@ import (
 	"net/http/httptest"
 	"testing"
 	"time"
+
+	"github.com/rcourtman/pulse-go-rewrite/internal/unifiedresources"
 )

 func TestMonitoredSystemLedgerEntryTypes(t *testing.T) {
@ -15,6 +17,7 @@ func TestMonitoredSystemLedgerEntryTypes(t *testing.T) {
 		Status: "online",
 		StatusExplanation: MonitoredSystemLedgerStatusExplanation{
 			Summary: "All included top-level collection paths currently report online status.",
+			Reasons: []MonitoredSystemLedgerStatusReason{},
 		},
 		LastSeen: "2025-01-01T00:00:00Z",
 		Source:   "agent",
@ -42,6 +45,9 @@ func TestMonitoredSystemLedgerEntryTypes(t *testing.T) {
 	if decoded.StatusExplanation.Summary == "" {
 		t.Errorf("status explanation mismatch: %+v", decoded.StatusExplanation)
 	}
+	if decoded.StatusExplanation.Reasons == nil {
+		t.Errorf("status explanation reasons mismatch: %+v", decoded.StatusExplanation)
+	}
 	if decoded.Source != "agent" {
 		t.Errorf("source mismatch: got %q", decoded.Source)
 	}
@ -85,21 +91,31 @@ func TestFormatLastSeen(t *testing.T) {
 }

 func TestMonitoredSystemLedgerStatusExplanation(t *testing.T) {
-	tests := []struct {
-		status string
-		want   string
-	}{
-		{"online", "All included top-level collection paths currently report online status."},
-		{"warning", "At least one included top-level collection path is degraded or stale, so Pulse marks this monitored system as warning."},
-		{"offline", "At least one included top-level collection path is offline or disconnected, so Pulse marks this monitored system as offline."},
-		{"unknown", "Pulse cannot determine a canonical runtime status for this monitored system yet."},
+	got := monitoredSystemLedgerStatusExplanation(unifiedresources.MonitoredSystemStatusExplanation{
+		Summary: "At least one included source is stale, so Pulse marks this monitored system as warning.",
+		Reasons: []unifiedresources.MonitoredSystemStatusReason{
+			{
+				Kind:     "source-stale",
+				Name:     "Tower",
+				Type:     "host",
+				Source:   "agent",
+				Status:   "stale",
+				LastSeen: time.Date(2026, 3, 23, 11, 55, 0, 0, time.UTC),
+				Summary:  "Agent data for Tower is stale (last reported 2026-03-23T11:55:00Z).",
+			},
+		},
+	}, "warning")
+	if got.Summary != "At least one included source is stale, so Pulse marks this monitored system as warning." {
+		t.Fatalf("unexpected status summary: %+v", got)
 	}
-
-	for _, tt := range tests {
-		got := monitoredSystemLedgerStatusExplanation(tt.status)
-		if got.Summary != tt.want {
-			t.Errorf("monitoredSystemLedgerStatusExplanation(%q) = %q, want %q", tt.status, got.Summary, tt.want)
-		}
+	if len(got.Reasons) != 1 {
+		t.Fatalf("expected one status reason, got %+v", got)
+	}
+	if got.Reasons[0].Status != "stale" {
+		t.Fatalf("expected stale status reason, got %+v", got.Reasons[0])
+	}
+	if got.Reasons[0].LastSeen != "2026-03-23T11:55:00Z" {
+		t.Fatalf("expected formatted reason last_seen, got %+v", got.Reasons[0])
 	}
 }

@ -141,11 +157,17 @@ func TestMonitoredSystemLedgerNilSystemsBecomesEmptyArray(t *testing.T) {
 func TestMonitoredSystemLedgerEntryNormalizeCollections(t *testing.T) {
 	entry := MonitoredSystemLedgerEntry{
 		Name: "server-1",
+		StatusExplanation: MonitoredSystemLedgerStatusExplanation{
+			Summary: "Pulse cannot determine a canonical runtime status for this monitored system yet.",
+		},
 		Explanation: MonitoredSystemLedgerExplanation{
 			Summary: "Counts as one monitored system because Pulse sees one top-level host view from agent.",
 		},
 	}.NormalizeCollections()

+	if entry.StatusExplanation.Reasons == nil {
+		t.Fatal("expected status explanation reasons to normalize to an empty slice")
+	}
 	if entry.Explanation.Reasons == nil {
 		t.Fatal("expected explanation reasons to normalize to an empty slice")
 	}
@ -166,6 +188,7 @@ func TestHandleMonitoredSystemLedgerHTTP(t *testing.T) {
 				Status: "online",
 				StatusExplanation: MonitoredSystemLedgerStatusExplanation{
 					Summary: "All included top-level collection paths currently report online status.",
+					Reasons: []MonitoredSystemLedgerStatusReason{},
 				},
 				LastSeen: "2025-01-01T00:00:00Z",
 				Source:   "agent",
@ -206,6 +229,9 @@ func TestHandleMonitoredSystemLedgerHTTP(t *testing.T) {
 	if decoded.Systems[0].StatusExplanation.Summary == "" {
 		t.Errorf("expected status explanation summary, got %+v", decoded.Systems[0].StatusExplanation)
 	}
+	if decoded.Systems[0].StatusExplanation.Reasons == nil {
+		t.Errorf("expected status explanation reasons, got %+v", decoded.Systems[0].StatusExplanation)
+	}
 	if decoded.Systems[0].Explanation.Summary == "" {
 		t.Errorf("expected explanation summary, got %+v", decoded.Systems[0].Explanation)
 	}
--- a/internal/unifiedresources/monitored_systems.go
+++ b/internal/unifiedresources/monitored_systems.go
@ -42,15 +42,35 @@ type MonitoredSystemGroupingSurface struct {
 	Source string
 }

+// MonitoredSystemStatusExplanation explains why Pulse chose the canonical
+// monitored-system runtime status.
+type MonitoredSystemStatusExplanation struct {
+	Summary string
+	Reasons []MonitoredSystemStatusReason
+}
+
+// MonitoredSystemStatusReason captures one canonical degraded-status signal
+// that contributed to the monitored-system runtime status.
+type MonitoredSystemStatusReason struct {
+	Kind     string
+	Name     string
+	Type     string
+	Source   string
+	Status   string
+	LastSeen time.Time
+	Summary  string
+}
+
 // MonitoredSystemRecord describes a counted top-level monitored system after
 // canonical cross-view deduplication.
 type MonitoredSystemRecord struct {
-	Name        string
-	Type        string
-	Status      ResourceStatus
-	LastSeen    time.Time
-	Source      string
-	Explanation MonitoredSystemGroupingExplanation
+	Name              string
+	Type              string
+	Status            ResourceStatus
+	StatusExplanation MonitoredSystemStatusExplanation
+	LastSeen          time.Time
+	Source            string
+	Explanation       MonitoredSystemGroupingExplanation
 }

 // MonitoredSystemCount returns the number of top-level monitored systems after
@ -152,13 +172,15 @@ func resolveMonitoredSystemTopLevelSystems(rs ReadState) TopLevelSystemResolver

 func monitoredSystemRecord(group monitoredSystemGroup) MonitoredSystemRecord {
 	resource := preferredMonitoredSystemResource(group.resources)
+	status := monitoredSystemStatus(group.resources)
 	record := MonitoredSystemRecord{
-		Name:        monitoredSystemDisplayName(group.resources, resource),
-		Type:        monitoredSystemType(resource),
-		Status:      monitoredSystemStatus(group.resources),
-		LastSeen:    monitoredSystemLastSeen(group.resources),
-		Source:      monitoredSystemSource(group.resources),
-		Explanation: normalizeMonitoredSystemGroupingExplanation(group.explanation),
+		Name:              monitoredSystemDisplayName(group.resources, resource),
+		Type:              monitoredSystemType(resource),
+		Status:            status,
+		StatusExplanation: monitoredSystemStatusExplanation(group.resources, status),
+		LastSeen:          monitoredSystemLastSeen(group.resources),
+		Source:            monitoredSystemSource(group.resources),
+		Explanation:       normalizeMonitoredSystemGroupingExplanation(group.explanation),
 	}
 	if record.Name == "" {
 		record.Name = "Unnamed system"
@ -169,6 +191,10 @@ func monitoredSystemRecord(group monitoredSystemGroup) MonitoredSystemRecord {
 	if record.Status == "" {
 		record.Status = StatusUnknown
 	}
+	record.StatusExplanation = normalizeMonitoredSystemStatusExplanation(record.StatusExplanation)
+	if record.StatusExplanation.Summary == "" {
+		record.StatusExplanation.Summary = monitoredSystemStatusSummary(record.Status, record.StatusExplanation.Reasons)
+	}
 	if record.Source == "" {
 		record.Source = "unknown"
 	}
@ -190,6 +216,15 @@ func normalizeMonitoredSystemGroupingExplanation(
 	return explanation
 }

+func normalizeMonitoredSystemStatusExplanation(
+	explanation MonitoredSystemStatusExplanation,
+) MonitoredSystemStatusExplanation {
+	if explanation.Reasons == nil {
+		explanation.Reasons = []MonitoredSystemStatusReason{}
+	}
+	return explanation
+}
+
 func monitoredSystemStandaloneExplanation(resources []*Resource) MonitoredSystemGroupingExplanation {
 	surfaces := monitoredSystemGroupingSurfaces(resources)
 	resource := preferredMonitoredSystemResource(resources)
@ -412,6 +447,170 @@ func monitoredSystemStatus(resources []*Resource) ResourceStatus {
 	return best
 }

+func monitoredSystemStatusExplanation(
+	resources []*Resource,
+	status ResourceStatus,
+) MonitoredSystemStatusExplanation {
+	reasons := monitoredSystemStatusReasons(resources)
+	return normalizeMonitoredSystemStatusExplanation(MonitoredSystemStatusExplanation{
+		Summary: monitoredSystemStatusSummary(status, reasons),
+		Reasons: reasons,
+	})
+}
+
+func monitoredSystemStatusReasons(resources []*Resource) []MonitoredSystemStatusReason {
+	reasons := make([]MonitoredSystemStatusReason, 0)
+	for _, resource := range resources {
+		reasons = append(reasons, monitoredSystemResourceStatusReasons(resource)...)
+	}
+	sort.Slice(reasons, func(i, j int) bool {
+		if monitoredSystemStatusReasonPriority(reasons[i]) != monitoredSystemStatusReasonPriority(reasons[j]) {
+			return monitoredSystemStatusReasonPriority(reasons[i]) < monitoredSystemStatusReasonPriority(reasons[j])
+		}
+		if reasons[i].Name != reasons[j].Name {
+			return reasons[i].Name < reasons[j].Name
+		}
+		if reasons[i].Type != reasons[j].Type {
+			return reasons[i].Type < reasons[j].Type
+		}
+		if reasons[i].Source != reasons[j].Source {
+			return reasons[i].Source < reasons[j].Source
+		}
+		if !reasons[i].LastSeen.Equal(reasons[j].LastSeen) {
+			return reasons[i].LastSeen.Before(reasons[j].LastSeen)
+		}
+		return reasons[i].Summary < reasons[j].Summary
+	})
+	if reasons == nil {
+		return []MonitoredSystemStatusReason{}
+	}
+	return reasons
+}
+
+func monitoredSystemResourceStatusReasons(resource *Resource) []MonitoredSystemStatusReason {
+	if resource == nil {
+		return nil
+	}
+
+	name := monitoredSystemResourceDisplayName(resource)
+	if name == "" {
+		name = "Unnamed source"
+	}
+
+	resourceType := monitoredSystemType(resource)
+	if resourceType == "" {
+		resourceType = "system"
+	}
+
+	reasons := make([]MonitoredSystemStatusReason, 0)
+	if len(resource.SourceStatus) > 0 {
+		sourceKeys := make([]DataSource, 0, len(resource.SourceStatus))
+		for source := range resource.SourceStatus {
+			sourceKeys = append(sourceKeys, source)
+		}
+		sort.Slice(sourceKeys, func(i, j int) bool {
+			return sourceKeys[i] < sourceKeys[j]
+		})
+
+		for _, source := range sourceKeys {
+			sourceStatus := resource.SourceStatus[source]
+			normalizedStatus := normalizeMonitoredSystemSourceStatus(sourceStatus.Status)
+			if normalizedStatus == "online" {
+				continue
+			}
+			reasons = append(reasons, MonitoredSystemStatusReason{
+				Kind:     "source-" + normalizedStatus,
+				Name:     name,
+				Type:     resourceType,
+				Source:   string(source),
+				Status:   normalizedStatus,
+				LastSeen: sourceStatus.LastSeen,
+				Summary:  monitoredSystemSourceStatusReasonSummary(name, source, normalizedStatus, sourceStatus.LastSeen),
+			})
+		}
+	}
+
+	if len(reasons) > 0 {
+		return reasons
+	}
+
+	normalizedStatus := normalizeMonitoredSystemSourceStatus(string(resource.Status))
+	if normalizedStatus == "online" {
+		return nil
+	}
+
+	source := monitoredSystemPrimarySource(resource)
+	if source == "" {
+		source = "unknown"
+	}
+	return []MonitoredSystemStatusReason{
+		{
+			Kind:     "surface-" + normalizedStatus,
+			Name:     name,
+			Type:     resourceType,
+			Source:   source,
+			Status:   normalizedStatus,
+			LastSeen: resource.LastSeen,
+			Summary:  monitoredSystemSurfaceStatusReasonSummary(name, resourceType, source, normalizedStatus, resource.LastSeen),
+		},
+	}
+}
+
+func normalizeMonitoredSystemSourceStatus(status string) string {
+	switch strings.ToLower(strings.TrimSpace(status)) {
+	case "online":
+		return "online"
+	case "stale", "warning":
+		return "stale"
+	case "offline":
+		return "offline"
+	default:
+		return "unknown"
+	}
+}
+
+func monitoredSystemStatusSummary(status ResourceStatus, reasons []MonitoredSystemStatusReason) string {
+	switch status {
+	case StatusOnline:
+		return "All included top-level collection paths currently report online status."
+	case StatusWarning:
+		switch {
+		case monitoredSystemHasReasonStatus(reasons, "stale"):
+			return "At least one included source is stale, so Pulse marks this monitored system as warning."
+		case monitoredSystemHasReasonStatus(reasons, "offline"):
+			return "At least one included source is offline or disconnected, but the canonical grouped status currently resolves to warning."
+		default:
+			return "At least one included top-level collection path is degraded, so Pulse marks this monitored system as warning."
+		}
+	case StatusOffline:
+		return "At least one included source is offline or disconnected, so Pulse marks this monitored system as offline."
+	default:
+		return "Pulse cannot determine a canonical runtime status for this monitored system yet."
+	}
+}
+
+func monitoredSystemHasReasonStatus(reasons []MonitoredSystemStatusReason, status string) bool {
+	for _, reason := range reasons {
+		if reason.Status == status {
+			return true
+		}
+	}
+	return false
+}
+
+func monitoredSystemStatusReasonPriority(reason MonitoredSystemStatusReason) int {
+	switch reason.Status {
+	case "offline":
+		return 0
+	case "stale":
+		return 1
+	case "unknown":
+		return 2
+	default:
+		return 3
+	}
+}
+
 func monitoredSystemStatusPriority(status ResourceStatus) int {
 	switch status {
 	case StatusWarning:
@ -427,6 +626,63 @@ func monitoredSystemStatusPriority(status ResourceStatus) int {
 	}
 }

+func monitoredSystemSourceStatusReasonSummary(
+	name string,
+	source DataSource,
+	status string,
+	lastSeen time.Time,
+) string {
+	subject := name
+	if strings.TrimSpace(subject) == "" {
+		subject = "this monitored system"
+	}
+
+	summary := monitoredSystemStatusSourceLabel(string(source)) + " data for " + subject
+	switch status {
+	case "stale":
+		summary += " is stale"
+	case "offline":
+		summary += " is offline or disconnected"
+	default:
+		summary += " does not report a canonical status yet"
+	}
+
+	if !lastSeen.IsZero() {
+		summary += " (last reported " + lastSeen.UTC().Format(time.RFC3339) + ")."
+		return summary
+	}
+	return summary + "."
+}
+
+func monitoredSystemSurfaceStatusReasonSummary(
+	name string,
+	resourceType string,
+	source string,
+	status string,
+	lastSeen time.Time,
+) string {
+	subject := name
+	if strings.TrimSpace(subject) == "" {
+		subject = "This monitored system"
+	}
+
+	summary := monitoredSystemGroupingTypeLabel(resourceType) + " view for " + subject + " currently reports "
+	switch status {
+	case "stale":
+		summary += "warning"
+	case "offline":
+		summary += "offline"
+	default:
+		summary += "unknown"
+	}
+	summary += " status from " + monitoredSystemStatusSourceLabel(source)
+	if !lastSeen.IsZero() {
+		summary += " (last reported " + lastSeen.UTC().Format(time.RFC3339) + ")."
+		return summary
+	}
+	return summary + "."
+}
+
 func monitoredSystemLastSeen(resources []*Resource) time.Time {
 	var lastSeen time.Time
 	for _, resource := range resources {
@ -488,6 +744,29 @@ func monitoredSystemPrimarySource(resource *Resource) string {
 	return ""
 }

+func monitoredSystemStatusSourceLabel(value string) string {
+	switch strings.TrimSpace(value) {
+	case "agent":
+		return "Agent"
+	case "docker":
+		return "Docker"
+	case "kubernetes":
+		return "Kubernetes"
+	case "pbs":
+		return "PBS"
+	case "pmg":
+		return "PMG"
+	case "proxmox":
+		return "Proxmox"
+	case "truenas":
+		return "TrueNAS"
+	case "", "unknown":
+		return "Unknown source"
+	default:
+		return strings.TrimSpace(value)
+	}
+}
+
 func cloneStringSet(in map[string]struct{}) map[string]struct{} {
 	out := make(map[string]struct{}, len(in))
 	for key := range in {
--- a/internal/unifiedresources/registry_test.go
+++ b/internal/unifiedresources/registry_test.go
@ -261,6 +261,70 @@ func TestResourceRegistry_MonitoredSystemsSummarizeCanonicalTopLevelViews(t *tes
 	}
 }

+func TestMonitoredSystemsExplainsStaleGroupedSourceWhileLastSeenStaysFresh(t *testing.T) {
+	rr := NewRegistry(nil)
+	now := time.Date(2026, 3, 23, 12, 0, 0, 0, time.UTC)
+
+	agentResource := topLevelTestAgent("agent-host", "tower.local", "machine-1", "agent-1")
+	agentResource.LastSeen = now.Add(-5 * time.Minute)
+	dockerResource := topLevelTestDockerHost("docker-host", "tower.local", "docker-runtime-1", "agent-1")
+	dockerResource.LastSeen = now.Add(-10 * time.Second)
+
+	rr.IngestRecords(SourceAgent, []IngestRecord{
+		{
+			SourceID: "agent-host",
+			Resource: agentResource,
+		},
+	})
+	rr.IngestRecords(SourceDocker, []IngestRecord{
+		{
+			SourceID: "docker-host",
+			Resource: dockerResource,
+		},
+	})
+
+	rr.MarkStale(now, map[DataSource]time.Duration{
+		SourceAgent:  60 * time.Second,
+		SourceDocker: 60 * time.Second,
+	})
+
+	systems := MonitoredSystems(rr)
+	if len(systems) != 1 {
+		t.Fatalf("MonitoredSystems() returned %d systems, want 1", len(systems))
+	}
+
+	system := systems[0]
+	if system.Status != StatusWarning {
+		t.Fatalf("expected grouped monitored system status warning, got %+v", system)
+	}
+	if !system.LastSeen.Equal(dockerResource.LastSeen) {
+		t.Fatalf("expected grouped last_seen %s, got %s", dockerResource.LastSeen, system.LastSeen)
+	}
+	if system.StatusExplanation.Summary == "" {
+		t.Fatal("expected grouped monitored system status explanation summary")
+	}
+	if len(system.StatusExplanation.Reasons) != 1 {
+		t.Fatalf("expected one stale grouped-source reason, got %+v", system.StatusExplanation.Reasons)
+	}
+
+	reason := system.StatusExplanation.Reasons[0]
+	if reason.Kind != "source-stale" {
+		t.Fatalf("expected stale source reason kind, got %+v", reason)
+	}
+	if reason.Source != string(SourceAgent) {
+		t.Fatalf("expected agent source reason, got %+v", reason)
+	}
+	if reason.Status != "stale" {
+		t.Fatalf("expected stale reason status, got %+v", reason)
+	}
+	if !reason.LastSeen.Equal(agentResource.LastSeen) {
+		t.Fatalf("expected stale reason last_seen %s, got %s", agentResource.LastSeen, reason.LastSeen)
+	}
+	if reason.Summary == "" {
+		t.Fatalf("expected stale reason summary, got %+v", reason)
+	}
+}
+
 func TestResourceRegistry_IngestRecords_UnknownSource(t *testing.T) {
 	rr := NewRegistry(nil)
 	now := time.Date(2026, 2, 20, 12, 0, 0, 0, time.UTC)