Explain canonical monitored system status reasons

This commit is contained in:
rcourtman 2026-03-23 22:55:09 +00:00
parent 3a6a376b33
commit 1e93ede11e
14 changed files with 625 additions and 47 deletions

View file

@ -234,6 +234,10 @@ That same ledger read now also carries backend-owned status explanation copy,
and lifecycle-adjacent details must render it beside the counting rationale so
operators can interpret warning, offline, and unknown states without inventing
local status semantics.
Those status details are now structured as well: lifecycle-adjacent consumers
must preserve the canonical reason list from the ledger read so operators can
see which grouped source or surface degraded and when it last reported,
instead of only seeing a generic warning/offline paragraph.
Lifecycle-adjacent workspace copy must also keep the same commercial framing:
infrastructure operations may point operators to Pulse Pro for billing, but it
must describe that boundary in monitored-system, plan-limit, and license-status

View file

@ -243,6 +243,11 @@ That same contract now also owns the backend-authored status explanation paired
with that enum, and the monitored-system ledger details surface must render it
alongside the counting explanation instead of inventing page-local wording for
what online, warning, offline, or unknown means.
That nested status explanation is now a structured contract, not summary-only
copy: `/api/license/monitored-system-ledger` must preserve the canonical
summary plus the ordered reason list from unified resources, including the
degraded source or surface, its status, and its last-seen timestamp, so mixed
fresh/stale grouped systems remain explainable through one governed API shape.
That client contract must also fail closed when older or partial payloads omit
the nested explanation object: the frontend may normalize missing explanation
fields to empty reasons/surfaces plus a safe default summary, but it must not

View file

@ -215,6 +215,10 @@ view may normalize a safe default when that field is absent during mixed-version
rollouts, but it must render the canonical backend explanation when present
instead of inventing page-local wording for what warning, offline, or unknown
means on a counted monitored system.
That same cloud-paid surface must now also render the canonical status reason
list when present, so customers can see exactly which grouped source or
top-level surface degraded and when it last reported rather than only reading
generic status copy beside a fresh aggregate `Last Seen` value.
Frontend billing/admin surfaces must not synthesize `plan_version` from
subscription lifecycle state. When a hosted billing record lacks a plan label,
the UI must preserve that absence instead of fabricating values like `active`

View file

@ -370,6 +370,12 @@ storage-adjacent API wiring may consume the canonical monitored-system ledger
and monitored-system cap helpers, but it must not revive deleted agent-era
helper names or imply that API-backed infrastructure sits outside the counted
system model.
That same shared `internal/api/` dependency now also assumes monitored-system
ledger status details stay canonical and source-aware: storage- or recovery-
adjacent consumers may read the ledgers nested status explanation, but they
must preserve the backend-provided reason list for stale or offline grouped
sources instead of reducing those mixed fresh/stale system states back to a
generic label.
That same shared `internal/api/` dependency now also assumes self-hosted
commercial counting is canonical at the top-level monitored-system boundary:
shared setup, deploy, entitlement, and API-backed monitoring helpers may not

View file

@ -171,6 +171,14 @@ grouping reasons plus included top-level surfaces, and fall back to an
explicit standalone explanation when no cross-source merge occurred. Support
and billing surfaces must consume that shared explanation contract instead of
reconstructing count reasons from API-local heuristics.
That same monitored-system contract now also owns canonical runtime-status
explanations. When a grouped monitored system resolves to warning, offline, or
unknown, unified resources must expose the shared summary plus structured
degraded-status reasons derived from the grouped top-level resources and their
source freshness state, including which source or surface degraded and the
corresponding last-seen timestamp. Billing and support surfaces must consume
that shared reason list instead of trying to infer why a fresh overall
`last_seen` can still coincide with warning status.
The unified-resource runtime now also owns the durable change timeline for the
canonical resource view. `internal/unifiedresources/monitor_adapter.go` feeds

View file

@ -38,6 +38,7 @@ describe('MonitoredSystemLedgerAPI', () => {
status: 'online',
status_explanation: {
summary: 'All included top-level collection paths currently report online status.',
reasons: [],
},
last_seen: '2026-01-01T00:00:00Z',
source: 'agent',
@ -63,6 +64,7 @@ describe('MonitoredSystemLedgerAPI', () => {
expect(result.systems[0]?.explanation.summary).toContain('Counts as one monitored system');
expect(result.systems[0]?.status_explanation?.summary).toContain('currently report online');
expect(result.systems[0]?.status_explanation?.reasons).toEqual([]);
expect(result.systems[0]?.explanation.reasons).toHaveLength(1);
expect(result.systems[0]?.explanation.surfaces).toHaveLength(1);
});
@ -86,6 +88,7 @@ describe('MonitoredSystemLedgerAPI', () => {
expect(result.systems[0]?.explanation.summary).toContain('counts this top-level collection path');
expect(result.systems[0]?.status_explanation?.summary).toContain('currently report online');
expect(result.systems[0]?.status_explanation?.reasons).toEqual([]);
expect(result.systems[0]?.explanation.reasons).toEqual([]);
expect(result.systems[0]?.explanation.surfaces).toEqual([]);
});
@ -110,6 +113,50 @@ describe('MonitoredSystemLedgerAPI', () => {
expect(result.systems[0]?.status).toBe('warning');
});
it('preserves canonical status explanation reasons from the API contract', async () => {
vi.mocked(apiFetchJSON).mockResolvedValueOnce({
systems: [
{
name: 'Tower',
type: 'host',
status: 'warning',
status_explanation: {
summary: 'At least one included source is stale, so Pulse marks this monitored system as warning.',
reasons: [
{
kind: 'source-stale',
name: 'Tower',
type: 'host',
source: 'agent',
status: 'stale',
last_seen: '2026-03-23T11:55:00Z',
summary: 'Agent data for Tower is stale (last reported 2026-03-23T11:55:00Z).',
},
],
},
last_seen: '2026-03-23T11:59:50Z',
source: 'multiple',
},
],
total: 1,
limit: 5,
});
const result = await MonitoredSystemLedgerAPI.getLedger();
expect(result.systems[0]?.status_explanation?.reasons).toEqual([
{
kind: 'source-stale',
name: 'Tower',
type: 'host',
source: 'agent',
status: 'stale',
last_seen: '2026-03-23T11:55:00Z',
summary: 'Agent data for Tower is stale (last reported 2026-03-23T11:55:00Z).',
},
]);
});
it('fails closed to unknown for unsupported status values', async () => {
vi.mocked(apiFetchJSON).mockResolvedValueOnce({
systems: [

View file

@ -22,6 +22,23 @@ export interface MonitoredSystemLedgerExplanation {
export interface MonitoredSystemLedgerStatusExplanation {
summary: string;
reasons: MonitoredSystemLedgerStatusReason[];
}
export type MonitoredSystemLedgerStatusReasonStatus =
| 'online'
| 'stale'
| 'offline'
| 'unknown';
export interface MonitoredSystemLedgerStatusReason {
kind: string;
name: string;
type: string;
source: string;
status: MonitoredSystemLedgerStatusReasonStatus;
last_seen: string;
summary: string;
}
export interface MonitoredSystemLedgerEntry {
@ -62,6 +79,7 @@ function normalizeMonitoredSystemLedgerEntry(
status,
status_explanation: {
summary: entry.status_explanation?.summary ?? defaultMonitoredSystemStatusExplanation(status),
reasons: (entry.status_explanation?.reasons ?? []).map(normalizeMonitoredSystemLedgerStatusReason),
},
explanation: {
summary:
@ -92,10 +110,34 @@ function defaultMonitoredSystemStatusExplanation(status: MonitoredSystemLedgerSt
case 'online':
return 'All included top-level collection paths currently report online status.';
case 'warning':
return 'At least one included top-level collection path is degraded or stale.';
return 'At least one included top-level collection path is degraded, so Pulse marks this monitored system as warning.';
case 'offline':
return 'At least one included top-level collection path is offline or disconnected.';
return 'At least one included source is offline or disconnected, so Pulse marks this monitored system as offline.';
default:
return 'Pulse cannot determine a canonical runtime status for this monitored system yet.';
}
}
function normalizeMonitoredSystemLedgerStatusReason(
reason: MonitoredSystemLedgerStatusReason,
): MonitoredSystemLedgerStatusReason {
return {
...reason,
status: normalizeMonitoredSystemLedgerStatusReasonStatus(reason.status),
last_seen: reason.last_seen ?? '',
};
}
function normalizeMonitoredSystemLedgerStatusReasonStatus(
status: MonitoredSystemLedgerStatusReasonStatus | string | null | undefined,
): MonitoredSystemLedgerStatusReasonStatus {
switch ((status ?? '').trim().toLowerCase()) {
case 'online':
case 'stale':
case 'offline':
case 'unknown':
return status.trim().toLowerCase() as MonitoredSystemLedgerStatusReasonStatus;
default:
return 'unknown';
}
}

View file

@ -51,6 +51,7 @@ function systemStatusExplanation(system: MonitoredSystemLedgerEntry): MonitoredS
summary:
system.status_explanation?.summary ??
'Pulse cannot determine a canonical runtime status for this monitored system yet.',
reasons: system.status_explanation?.reasons ?? [],
};
}
@ -179,6 +180,13 @@ export function MonitoredSystemLedgerPanel(props: MonitoredSystemLedgerPanelProp
<p class="whitespace-normal text-base-content">
{statusExplanation.summary}
</p>
<Show when={statusExplanation.reasons.length > 0}>
<ul class="space-y-1 whitespace-normal text-base-content">
<For each={statusExplanation.reasons}>
{(reason) => <li>{reason.summary}</li>}
</For>
</ul>
</Show>
</div>
<p class="whitespace-normal text-base-content">
{explanation.summary}

View file

@ -77,6 +77,7 @@ describe('MonitoredSystemLedgerPanel', () => {
status: 'online',
status_explanation: {
summary: 'All included top-level collection paths currently report online status.',
reasons: [],
},
last_seen: '2026-01-01T00:00:00Z',
source: 'agent',
@ -139,6 +140,7 @@ describe('MonitoredSystemLedgerPanel', () => {
status: 'online',
status_explanation: {
summary: 'All included top-level collection paths currently report online status.',
reasons: [],
},
last_seen: '2026-01-01T00:00:00Z',
source: 'agent',
@ -161,7 +163,19 @@ describe('MonitoredSystemLedgerPanel', () => {
status: 'offline',
status_explanation: {
summary:
'At least one included top-level collection path is offline or disconnected, so Pulse marks this monitored system as offline.',
'At least one included source is offline or disconnected, so Pulse marks this monitored system as offline.',
reasons: [
{
kind: 'source-offline',
name: 'server-b',
type: 'pbs-server',
source: 'pbs',
status: 'offline',
last_seen: '2026-01-01T23:55:00Z',
summary:
'PBS data for server-b is offline or disconnected (last reported 2026-01-01T23:55:00Z).',
},
],
},
last_seen: '2026-01-02T00:00:00Z',
source: 'pbs',
@ -220,7 +234,12 @@ describe('MonitoredSystemLedgerPanel', () => {
expect(screen.getByText('Current status')).toBeInTheDocument();
expect(
screen.getByText(
'At least one included top-level collection path is offline or disconnected, so Pulse marks this monitored system as offline.',
'At least one included source is offline or disconnected, so Pulse marks this monitored system as offline.',
),
).toBeInTheDocument();
expect(
screen.getByText(
'PBS data for server-b is offline or disconnected (last reported 2026-01-01T23:55:00Z).',
),
).toBeInTheDocument();
expect(screen.getByText('Included collection paths')).toBeInTheDocument();
@ -239,6 +258,7 @@ describe('MonitoredSystemLedgerPanel', () => {
status: 'online',
status_explanation: {
summary: 'All included top-level collection paths currently report online status.',
reasons: [],
},
last_seen: '2026-01-01T00:00:00Z',
source: 'agent',

View file

@ -581,7 +581,18 @@ func TestContract_MonitoredSystemLedgerJSONSnapshot(t *testing.T) {
Type: "host",
Status: "warning",
StatusExplanation: MonitoredSystemLedgerStatusExplanation{
Summary: "At least one included top-level collection path is degraded or stale, so Pulse marks this monitored system as warning.",
Summary: "At least one included source is stale, so Pulse marks this monitored system as warning.",
Reasons: []MonitoredSystemLedgerStatusReason{
{
Kind: "source-stale",
Name: "Tower",
Type: "host",
Source: "agent",
Status: "stale",
LastSeen: "2026-03-18T17:25:00Z",
Summary: "Agent data for Tower is stale (last reported 2026-03-18T17:25:00Z).",
},
},
},
LastSeen: "2026-03-18T17:30:00Z",
Source: "agent",
@ -620,7 +631,18 @@ func TestContract_MonitoredSystemLedgerJSONSnapshot(t *testing.T) {
"type":"host",
"status":"warning",
"status_explanation":{
"summary":"At least one included top-level collection path is degraded or stale, so Pulse marks this monitored system as warning."
"summary":"At least one included source is stale, so Pulse marks this monitored system as warning.",
"reasons":[
{
"kind":"source-stale",
"name":"Tower",
"type":"host",
"source":"agent",
"status":"stale",
"last_seen":"2026-03-18T17:25:00Z",
"summary":"Agent data for Tower is stale (last reported 2026-03-18T17:25:00Z)."
}
]
},
"last_seen":"2026-03-18T17:30:00Z",
"source":"agent",

View file

@ -23,7 +23,18 @@ type MonitoredSystemLedgerEntry struct {
}
type MonitoredSystemLedgerStatusExplanation struct {
Summary string `json:"summary"`
Summary string `json:"summary"`
Reasons []MonitoredSystemLedgerStatusReason `json:"reasons"`
}
type MonitoredSystemLedgerStatusReason struct {
Kind string `json:"kind"`
Name string `json:"name"`
Type string `json:"type"`
Source string `json:"source"`
Status string `json:"status"`
LastSeen string `json:"last_seen"`
Summary string `json:"summary"`
}
type MonitoredSystemLedgerExplanation struct {
@ -66,6 +77,9 @@ func (r MonitoredSystemLedgerResponse) NormalizeCollections() MonitoredSystemLed
}
func (e MonitoredSystemLedgerEntry) NormalizeCollections() MonitoredSystemLedgerEntry {
if e.StatusExplanation.Reasons == nil {
e.StatusExplanation.Reasons = []MonitoredSystemLedgerStatusReason{}
}
if e.Explanation.Reasons == nil {
e.Explanation.Reasons = []MonitoredSystemLedgerExplanationReason{}
}
@ -107,7 +121,7 @@ func (r *Router) handleMonitoredSystemLedger(w http.ResponseWriter, req *http.Re
Name: system.Name,
Type: system.Type,
Status: status,
StatusExplanation: monitoredSystemLedgerStatusExplanation(status),
StatusExplanation: monitoredSystemLedgerStatusExplanation(system.StatusExplanation, status),
LastSeen: formatLastSeen(system.LastSeen),
Source: system.Source,
Explanation: monitoredSystemLedgerExplanation(system.Explanation),
@ -138,24 +152,53 @@ func normalizeStatus(s string) string {
}
}
func monitoredSystemLedgerStatusExplanation(status string) MonitoredSystemLedgerStatusExplanation {
func monitoredSystemLedgerStatusExplanation(
explanation unifiedresources.MonitoredSystemStatusExplanation,
status string,
) MonitoredSystemLedgerStatusExplanation {
reasons := make([]MonitoredSystemLedgerStatusReason, 0, len(explanation.Reasons))
for _, reason := range explanation.Reasons {
reasons = append(reasons, MonitoredSystemLedgerStatusReason{
Kind: reason.Kind,
Name: reason.Name,
Type: reason.Type,
Source: reason.Source,
Status: normalizeMonitoredSystemLedgerReasonStatus(reason.Status),
LastSeen: formatLastSeen(reason.LastSeen),
Summary: reason.Summary,
})
}
summary := explanation.Summary
if summary == "" {
summary = defaultMonitoredSystemLedgerStatusSummary(status)
}
return MonitoredSystemLedgerStatusExplanation{
Summary: summary,
Reasons: reasons,
}
}
func defaultMonitoredSystemLedgerStatusSummary(status string) string {
switch status {
case "online":
return MonitoredSystemLedgerStatusExplanation{
Summary: "All included top-level collection paths currently report online status.",
}
return "All included top-level collection paths currently report online status."
case "warning":
return MonitoredSystemLedgerStatusExplanation{
Summary: "At least one included top-level collection path is degraded or stale, so Pulse marks this monitored system as warning.",
}
return "At least one included top-level collection path is degraded, so Pulse marks this monitored system as warning."
case "offline":
return MonitoredSystemLedgerStatusExplanation{
Summary: "At least one included top-level collection path is offline or disconnected, so Pulse marks this monitored system as offline.",
}
return "At least one included source is offline or disconnected, so Pulse marks this monitored system as offline."
default:
return MonitoredSystemLedgerStatusExplanation{
Summary: "Pulse cannot determine a canonical runtime status for this monitored system yet.",
}
return "Pulse cannot determine a canonical runtime status for this monitored system yet."
}
}
func normalizeMonitoredSystemLedgerReasonStatus(status string) string {
switch status {
case "online", "stale", "offline", "unknown":
return status
default:
return "unknown"
}
}

View file

@ -6,6 +6,8 @@ import (
"net/http/httptest"
"testing"
"time"
"github.com/rcourtman/pulse-go-rewrite/internal/unifiedresources"
)
func TestMonitoredSystemLedgerEntryTypes(t *testing.T) {
@ -15,6 +17,7 @@ func TestMonitoredSystemLedgerEntryTypes(t *testing.T) {
Status: "online",
StatusExplanation: MonitoredSystemLedgerStatusExplanation{
Summary: "All included top-level collection paths currently report online status.",
Reasons: []MonitoredSystemLedgerStatusReason{},
},
LastSeen: "2025-01-01T00:00:00Z",
Source: "agent",
@ -42,6 +45,9 @@ func TestMonitoredSystemLedgerEntryTypes(t *testing.T) {
if decoded.StatusExplanation.Summary == "" {
t.Errorf("status explanation mismatch: %+v", decoded.StatusExplanation)
}
if decoded.StatusExplanation.Reasons == nil {
t.Errorf("status explanation reasons mismatch: %+v", decoded.StatusExplanation)
}
if decoded.Source != "agent" {
t.Errorf("source mismatch: got %q", decoded.Source)
}
@ -85,21 +91,31 @@ func TestFormatLastSeen(t *testing.T) {
}
func TestMonitoredSystemLedgerStatusExplanation(t *testing.T) {
tests := []struct {
status string
want string
}{
{"online", "All included top-level collection paths currently report online status."},
{"warning", "At least one included top-level collection path is degraded or stale, so Pulse marks this monitored system as warning."},
{"offline", "At least one included top-level collection path is offline or disconnected, so Pulse marks this monitored system as offline."},
{"unknown", "Pulse cannot determine a canonical runtime status for this monitored system yet."},
got := monitoredSystemLedgerStatusExplanation(unifiedresources.MonitoredSystemStatusExplanation{
Summary: "At least one included source is stale, so Pulse marks this monitored system as warning.",
Reasons: []unifiedresources.MonitoredSystemStatusReason{
{
Kind: "source-stale",
Name: "Tower",
Type: "host",
Source: "agent",
Status: "stale",
LastSeen: time.Date(2026, 3, 23, 11, 55, 0, 0, time.UTC),
Summary: "Agent data for Tower is stale (last reported 2026-03-23T11:55:00Z).",
},
},
}, "warning")
if got.Summary != "At least one included source is stale, so Pulse marks this monitored system as warning." {
t.Fatalf("unexpected status summary: %+v", got)
}
for _, tt := range tests {
got := monitoredSystemLedgerStatusExplanation(tt.status)
if got.Summary != tt.want {
t.Errorf("monitoredSystemLedgerStatusExplanation(%q) = %q, want %q", tt.status, got.Summary, tt.want)
}
if len(got.Reasons) != 1 {
t.Fatalf("expected one status reason, got %+v", got)
}
if got.Reasons[0].Status != "stale" {
t.Fatalf("expected stale status reason, got %+v", got.Reasons[0])
}
if got.Reasons[0].LastSeen != "2026-03-23T11:55:00Z" {
t.Fatalf("expected formatted reason last_seen, got %+v", got.Reasons[0])
}
}
@ -141,11 +157,17 @@ func TestMonitoredSystemLedgerNilSystemsBecomesEmptyArray(t *testing.T) {
func TestMonitoredSystemLedgerEntryNormalizeCollections(t *testing.T) {
entry := MonitoredSystemLedgerEntry{
Name: "server-1",
StatusExplanation: MonitoredSystemLedgerStatusExplanation{
Summary: "Pulse cannot determine a canonical runtime status for this monitored system yet.",
},
Explanation: MonitoredSystemLedgerExplanation{
Summary: "Counts as one monitored system because Pulse sees one top-level host view from agent.",
},
}.NormalizeCollections()
if entry.StatusExplanation.Reasons == nil {
t.Fatal("expected status explanation reasons to normalize to an empty slice")
}
if entry.Explanation.Reasons == nil {
t.Fatal("expected explanation reasons to normalize to an empty slice")
}
@ -166,6 +188,7 @@ func TestHandleMonitoredSystemLedgerHTTP(t *testing.T) {
Status: "online",
StatusExplanation: MonitoredSystemLedgerStatusExplanation{
Summary: "All included top-level collection paths currently report online status.",
Reasons: []MonitoredSystemLedgerStatusReason{},
},
LastSeen: "2025-01-01T00:00:00Z",
Source: "agent",
@ -206,6 +229,9 @@ func TestHandleMonitoredSystemLedgerHTTP(t *testing.T) {
if decoded.Systems[0].StatusExplanation.Summary == "" {
t.Errorf("expected status explanation summary, got %+v", decoded.Systems[0].StatusExplanation)
}
if decoded.Systems[0].StatusExplanation.Reasons == nil {
t.Errorf("expected status explanation reasons, got %+v", decoded.Systems[0].StatusExplanation)
}
if decoded.Systems[0].Explanation.Summary == "" {
t.Errorf("expected explanation summary, got %+v", decoded.Systems[0].Explanation)
}

View file

@ -42,15 +42,35 @@ type MonitoredSystemGroupingSurface struct {
Source string
}
// MonitoredSystemStatusExplanation explains why Pulse chose the canonical
// monitored-system runtime status.
type MonitoredSystemStatusExplanation struct {
Summary string
Reasons []MonitoredSystemStatusReason
}
// MonitoredSystemStatusReason captures one canonical degraded-status signal
// that contributed to the monitored-system runtime status.
type MonitoredSystemStatusReason struct {
Kind string
Name string
Type string
Source string
Status string
LastSeen time.Time
Summary string
}
// MonitoredSystemRecord describes a counted top-level monitored system after
// canonical cross-view deduplication.
type MonitoredSystemRecord struct {
Name string
Type string
Status ResourceStatus
LastSeen time.Time
Source string
Explanation MonitoredSystemGroupingExplanation
Name string
Type string
Status ResourceStatus
StatusExplanation MonitoredSystemStatusExplanation
LastSeen time.Time
Source string
Explanation MonitoredSystemGroupingExplanation
}
// MonitoredSystemCount returns the number of top-level monitored systems after
@ -152,13 +172,15 @@ func resolveMonitoredSystemTopLevelSystems(rs ReadState) TopLevelSystemResolver
func monitoredSystemRecord(group monitoredSystemGroup) MonitoredSystemRecord {
resource := preferredMonitoredSystemResource(group.resources)
status := monitoredSystemStatus(group.resources)
record := MonitoredSystemRecord{
Name: monitoredSystemDisplayName(group.resources, resource),
Type: monitoredSystemType(resource),
Status: monitoredSystemStatus(group.resources),
LastSeen: monitoredSystemLastSeen(group.resources),
Source: monitoredSystemSource(group.resources),
Explanation: normalizeMonitoredSystemGroupingExplanation(group.explanation),
Name: monitoredSystemDisplayName(group.resources, resource),
Type: monitoredSystemType(resource),
Status: status,
StatusExplanation: monitoredSystemStatusExplanation(group.resources, status),
LastSeen: monitoredSystemLastSeen(group.resources),
Source: monitoredSystemSource(group.resources),
Explanation: normalizeMonitoredSystemGroupingExplanation(group.explanation),
}
if record.Name == "" {
record.Name = "Unnamed system"
@ -169,6 +191,10 @@ func monitoredSystemRecord(group monitoredSystemGroup) MonitoredSystemRecord {
if record.Status == "" {
record.Status = StatusUnknown
}
record.StatusExplanation = normalizeMonitoredSystemStatusExplanation(record.StatusExplanation)
if record.StatusExplanation.Summary == "" {
record.StatusExplanation.Summary = monitoredSystemStatusSummary(record.Status, record.StatusExplanation.Reasons)
}
if record.Source == "" {
record.Source = "unknown"
}
@ -190,6 +216,15 @@ func normalizeMonitoredSystemGroupingExplanation(
return explanation
}
func normalizeMonitoredSystemStatusExplanation(
explanation MonitoredSystemStatusExplanation,
) MonitoredSystemStatusExplanation {
if explanation.Reasons == nil {
explanation.Reasons = []MonitoredSystemStatusReason{}
}
return explanation
}
func monitoredSystemStandaloneExplanation(resources []*Resource) MonitoredSystemGroupingExplanation {
surfaces := monitoredSystemGroupingSurfaces(resources)
resource := preferredMonitoredSystemResource(resources)
@ -412,6 +447,170 @@ func monitoredSystemStatus(resources []*Resource) ResourceStatus {
return best
}
func monitoredSystemStatusExplanation(
resources []*Resource,
status ResourceStatus,
) MonitoredSystemStatusExplanation {
reasons := monitoredSystemStatusReasons(resources)
return normalizeMonitoredSystemStatusExplanation(MonitoredSystemStatusExplanation{
Summary: monitoredSystemStatusSummary(status, reasons),
Reasons: reasons,
})
}
func monitoredSystemStatusReasons(resources []*Resource) []MonitoredSystemStatusReason {
reasons := make([]MonitoredSystemStatusReason, 0)
for _, resource := range resources {
reasons = append(reasons, monitoredSystemResourceStatusReasons(resource)...)
}
sort.Slice(reasons, func(i, j int) bool {
if monitoredSystemStatusReasonPriority(reasons[i]) != monitoredSystemStatusReasonPriority(reasons[j]) {
return monitoredSystemStatusReasonPriority(reasons[i]) < monitoredSystemStatusReasonPriority(reasons[j])
}
if reasons[i].Name != reasons[j].Name {
return reasons[i].Name < reasons[j].Name
}
if reasons[i].Type != reasons[j].Type {
return reasons[i].Type < reasons[j].Type
}
if reasons[i].Source != reasons[j].Source {
return reasons[i].Source < reasons[j].Source
}
if !reasons[i].LastSeen.Equal(reasons[j].LastSeen) {
return reasons[i].LastSeen.Before(reasons[j].LastSeen)
}
return reasons[i].Summary < reasons[j].Summary
})
if reasons == nil {
return []MonitoredSystemStatusReason{}
}
return reasons
}
func monitoredSystemResourceStatusReasons(resource *Resource) []MonitoredSystemStatusReason {
if resource == nil {
return nil
}
name := monitoredSystemResourceDisplayName(resource)
if name == "" {
name = "Unnamed source"
}
resourceType := monitoredSystemType(resource)
if resourceType == "" {
resourceType = "system"
}
reasons := make([]MonitoredSystemStatusReason, 0)
if len(resource.SourceStatus) > 0 {
sourceKeys := make([]DataSource, 0, len(resource.SourceStatus))
for source := range resource.SourceStatus {
sourceKeys = append(sourceKeys, source)
}
sort.Slice(sourceKeys, func(i, j int) bool {
return sourceKeys[i] < sourceKeys[j]
})
for _, source := range sourceKeys {
sourceStatus := resource.SourceStatus[source]
normalizedStatus := normalizeMonitoredSystemSourceStatus(sourceStatus.Status)
if normalizedStatus == "online" {
continue
}
reasons = append(reasons, MonitoredSystemStatusReason{
Kind: "source-" + normalizedStatus,
Name: name,
Type: resourceType,
Source: string(source),
Status: normalizedStatus,
LastSeen: sourceStatus.LastSeen,
Summary: monitoredSystemSourceStatusReasonSummary(name, source, normalizedStatus, sourceStatus.LastSeen),
})
}
}
if len(reasons) > 0 {
return reasons
}
normalizedStatus := normalizeMonitoredSystemSourceStatus(string(resource.Status))
if normalizedStatus == "online" {
return nil
}
source := monitoredSystemPrimarySource(resource)
if source == "" {
source = "unknown"
}
return []MonitoredSystemStatusReason{
{
Kind: "surface-" + normalizedStatus,
Name: name,
Type: resourceType,
Source: source,
Status: normalizedStatus,
LastSeen: resource.LastSeen,
Summary: monitoredSystemSurfaceStatusReasonSummary(name, resourceType, source, normalizedStatus, resource.LastSeen),
},
}
}
func normalizeMonitoredSystemSourceStatus(status string) string {
switch strings.ToLower(strings.TrimSpace(status)) {
case "online":
return "online"
case "stale", "warning":
return "stale"
case "offline":
return "offline"
default:
return "unknown"
}
}
func monitoredSystemStatusSummary(status ResourceStatus, reasons []MonitoredSystemStatusReason) string {
switch status {
case StatusOnline:
return "All included top-level collection paths currently report online status."
case StatusWarning:
switch {
case monitoredSystemHasReasonStatus(reasons, "stale"):
return "At least one included source is stale, so Pulse marks this monitored system as warning."
case monitoredSystemHasReasonStatus(reasons, "offline"):
return "At least one included source is offline or disconnected, but the canonical grouped status currently resolves to warning."
default:
return "At least one included top-level collection path is degraded, so Pulse marks this monitored system as warning."
}
case StatusOffline:
return "At least one included source is offline or disconnected, so Pulse marks this monitored system as offline."
default:
return "Pulse cannot determine a canonical runtime status for this monitored system yet."
}
}
func monitoredSystemHasReasonStatus(reasons []MonitoredSystemStatusReason, status string) bool {
for _, reason := range reasons {
if reason.Status == status {
return true
}
}
return false
}
func monitoredSystemStatusReasonPriority(reason MonitoredSystemStatusReason) int {
switch reason.Status {
case "offline":
return 0
case "stale":
return 1
case "unknown":
return 2
default:
return 3
}
}
func monitoredSystemStatusPriority(status ResourceStatus) int {
switch status {
case StatusWarning:
@ -427,6 +626,63 @@ func monitoredSystemStatusPriority(status ResourceStatus) int {
}
}
func monitoredSystemSourceStatusReasonSummary(
name string,
source DataSource,
status string,
lastSeen time.Time,
) string {
subject := name
if strings.TrimSpace(subject) == "" {
subject = "this monitored system"
}
summary := monitoredSystemStatusSourceLabel(string(source)) + " data for " + subject
switch status {
case "stale":
summary += " is stale"
case "offline":
summary += " is offline or disconnected"
default:
summary += " does not report a canonical status yet"
}
if !lastSeen.IsZero() {
summary += " (last reported " + lastSeen.UTC().Format(time.RFC3339) + ")."
return summary
}
return summary + "."
}
func monitoredSystemSurfaceStatusReasonSummary(
name string,
resourceType string,
source string,
status string,
lastSeen time.Time,
) string {
subject := name
if strings.TrimSpace(subject) == "" {
subject = "This monitored system"
}
summary := monitoredSystemGroupingTypeLabel(resourceType) + " view for " + subject + " currently reports "
switch status {
case "stale":
summary += "warning"
case "offline":
summary += "offline"
default:
summary += "unknown"
}
summary += " status from " + monitoredSystemStatusSourceLabel(source)
if !lastSeen.IsZero() {
summary += " (last reported " + lastSeen.UTC().Format(time.RFC3339) + ")."
return summary
}
return summary + "."
}
func monitoredSystemLastSeen(resources []*Resource) time.Time {
var lastSeen time.Time
for _, resource := range resources {
@ -488,6 +744,29 @@ func monitoredSystemPrimarySource(resource *Resource) string {
return ""
}
func monitoredSystemStatusSourceLabel(value string) string {
switch strings.TrimSpace(value) {
case "agent":
return "Agent"
case "docker":
return "Docker"
case "kubernetes":
return "Kubernetes"
case "pbs":
return "PBS"
case "pmg":
return "PMG"
case "proxmox":
return "Proxmox"
case "truenas":
return "TrueNAS"
case "", "unknown":
return "Unknown source"
default:
return strings.TrimSpace(value)
}
}
func cloneStringSet(in map[string]struct{}) map[string]struct{} {
out := make(map[string]struct{}, len(in))
for key := range in {

View file

@ -261,6 +261,70 @@ func TestResourceRegistry_MonitoredSystemsSummarizeCanonicalTopLevelViews(t *tes
}
}
func TestMonitoredSystemsExplainsStaleGroupedSourceWhileLastSeenStaysFresh(t *testing.T) {
rr := NewRegistry(nil)
now := time.Date(2026, 3, 23, 12, 0, 0, 0, time.UTC)
agentResource := topLevelTestAgent("agent-host", "tower.local", "machine-1", "agent-1")
agentResource.LastSeen = now.Add(-5 * time.Minute)
dockerResource := topLevelTestDockerHost("docker-host", "tower.local", "docker-runtime-1", "agent-1")
dockerResource.LastSeen = now.Add(-10 * time.Second)
rr.IngestRecords(SourceAgent, []IngestRecord{
{
SourceID: "agent-host",
Resource: agentResource,
},
})
rr.IngestRecords(SourceDocker, []IngestRecord{
{
SourceID: "docker-host",
Resource: dockerResource,
},
})
rr.MarkStale(now, map[DataSource]time.Duration{
SourceAgent: 60 * time.Second,
SourceDocker: 60 * time.Second,
})
systems := MonitoredSystems(rr)
if len(systems) != 1 {
t.Fatalf("MonitoredSystems() returned %d systems, want 1", len(systems))
}
system := systems[0]
if system.Status != StatusWarning {
t.Fatalf("expected grouped monitored system status warning, got %+v", system)
}
if !system.LastSeen.Equal(dockerResource.LastSeen) {
t.Fatalf("expected grouped last_seen %s, got %s", dockerResource.LastSeen, system.LastSeen)
}
if system.StatusExplanation.Summary == "" {
t.Fatal("expected grouped monitored system status explanation summary")
}
if len(system.StatusExplanation.Reasons) != 1 {
t.Fatalf("expected one stale grouped-source reason, got %+v", system.StatusExplanation.Reasons)
}
reason := system.StatusExplanation.Reasons[0]
if reason.Kind != "source-stale" {
t.Fatalf("expected stale source reason kind, got %+v", reason)
}
if reason.Source != string(SourceAgent) {
t.Fatalf("expected agent source reason, got %+v", reason)
}
if reason.Status != "stale" {
t.Fatalf("expected stale reason status, got %+v", reason)
}
if !reason.LastSeen.Equal(agentResource.LastSeen) {
t.Fatalf("expected stale reason last_seen %s, got %s", agentResource.LastSeen, reason.LastSeen)
}
if reason.Summary == "" {
t.Fatalf("expected stale reason summary, got %+v", reason)
}
}
func TestResourceRegistry_IngestRecords_UnknownSource(t *testing.T) {
rr := NewRegistry(nil)
now := time.Date(2026, 2, 20, 12, 0, 0, 0, time.UTC)