feat: show pending apt updates for Proxmox nodes (#1083)

- Add PendingUpdates and PendingUpdatesCheckedAt fields to Node model
- Add GetNodePendingUpdates method to Proxmox client (calls /nodes/{node}/apt/update)
- Add 30-minute polling cache to avoid excessive API calls
- Add pendingUpdates to frontend Node type
- Add color-coded badge in NodeSummaryTable (yellow: 1-9, orange: 10+)
- Update test stubs for interface compliance

Requires Sys.Audit permission on Proxmox API token to read apt updates.
This commit is contained in:
rcourtman 2026-01-21 10:22:50 +00:00
parent 510844ef65
commit ebc29b4fdb
11 changed files with 170 additions and 194 deletions

View file

@ -45,8 +45,7 @@ import { ReportingPanel } from './ReportingPanel';
import {
PveNodesTable,
PbsNodesTable,
PmgNodesTable,
type TemperatureTransportInfo,
PmgNodesTable
} from './ConfiguredNodeTables';
import { SettingsSectionNav } from './SettingsSectionNav';
import { SettingsAPI } from '@/api/settings';
@ -140,58 +139,6 @@ interface SystemDiagnostic {
memoryMB: number;
}
interface TemperatureProxyHTTPStatus {
node: string;
url?: string;
reachable: boolean;
error?: string;
}
interface TemperatureProxyControlPlaneState {
instance: string;
lastSync?: string;
refreshIntervalSeconds?: number;
secondsBehind?: number;
status?: string;
}
interface TemperatureProxySocketHost {
node?: string;
host?: string;
cooldownUntil?: string;
secondsRemaining?: number;
lastError?: string;
}
type TemperatureSocketCooldownInfo = {
secondsRemaining?: number;
until?: string;
lastError?: string;
};
// HostProxySummary removed - pulse-sensor-proxy is deprecated in v5
interface TemperatureProxyDiagnostic {
legacySSHDetected: boolean;
recommendProxyUpgrade: boolean;
socketFound: boolean;
socketPath?: string;
socketPermissions?: string;
socketOwner?: string;
socketGroup?: string;
proxyReachable?: boolean;
proxyVersion?: string;
proxyPublicKeySha256?: string;
proxySshDirectory?: string;
legacySshKeyCount?: number;
proxyCapabilities?: string[];
notes?: string[];
httpProxies?: TemperatureProxyHTTPStatus[];
controlPlaneEnabled?: boolean;
controlPlaneStates?: TemperatureProxyControlPlaneState[];
socketHostCooldowns?: TemperatureProxySocketHost[];
}
interface APITokenSummary {
id: string;
name: string;
@ -278,7 +225,6 @@ interface DiagnosticsData {
nodes: DiagnosticsNode[];
pbs: DiagnosticsPBS[];
system: SystemDiagnostic;
temperatureProxy?: TemperatureProxyDiagnostic | null;
apiTokens?: APITokenDiagnostic | null;
dockerAgents?: DockerAgentDiagnostic | null;
alerts?: AlertsDiagnostic | null;
@ -659,7 +605,6 @@ const Settings: Component<SettingsProps> = (props) => {
const [envOverrides, setEnvOverrides] = createSignal<Record<string, boolean>>({});
const [temperatureMonitoringEnabled, setTemperatureMonitoringEnabled] = createSignal(true);
const [savingTemperatureSetting, setSavingTemperatureSetting] = createSignal(false);
// hostProxyStatus removed - pulse-sensor-proxy is deprecated in v5
const [hideLocalLogin, setHideLocalLogin] = createSignal(false);
const [savingHideLocalLogin, setSavingHideLocalLogin] = createSignal(false);
@ -876,7 +821,7 @@ const Settings: Component<SettingsProps> = (props) => {
};
// Diagnostics
const [diagnosticsData, setDiagnosticsData] = createSignal<DiagnosticsData | null>(null);
const [_diagnosticsData, setDiagnosticsData] = createSignal<DiagnosticsData | null>(null);
const [_runningDiagnostics, setRunningDiagnostics] = createSignal(false);
// Security
@ -926,112 +871,12 @@ const Settings: Component<SettingsProps> = (props) => {
};
const normalizeHostKey = (value?: string | null) => {
if (!value) {
return '';
}
let result = value.trim().toLowerCase();
if (!result) {
return '';
}
result = result.replace(/^https?:\/\//, '');
const slashIndex = result.indexOf('/');
if (slashIndex !== -1) {
result = result.slice(0, slashIndex);
}
const colonIndex = result.indexOf(':');
if (colonIndex !== -1) {
result = result.slice(0, colonIndex);
}
return result;
};
const emitTemperatureProxyWarnings = (diag: DiagnosticsData | null) => {
if (!diag?.temperatureProxy) {
return;
}
if (diag.temperatureProxy.httpProxies) {
const failing = (diag.temperatureProxy.httpProxies as TemperatureProxyHTTPStatus[]).filter(
(proxy) => proxy && proxy.node && !proxy.reachable,
);
if (failing.length > 0) {
const nodes = failing.map((proxy) => proxy.node || 'Unknown').join(', ');
notificationStore.warning(`Pulse cannot reach HTTPS temperature proxy on: ${nodes}`);
}
}
if (diag.temperatureProxy.controlPlaneStates) {
const stale = (diag.temperatureProxy.controlPlaneStates as TemperatureProxyControlPlaneState[]).filter(
(state) => state && (state.status === 'stale' || state.status === 'offline'),
);
if (stale.length > 0) {
const names = stale.map((state) => state.instance || 'Proxy').join(', ');
notificationStore.warning(`Temperature proxy control plane is behind on: ${names}`);
}
}
if (diag.temperatureProxy.socketHostCooldowns) {
const cooling = (diag.temperatureProxy.socketHostCooldowns as TemperatureProxySocketHost[]).filter(
(entry) => entry && (entry.node || entry.host),
);
if (cooling.length > 0) {
const hosts = cooling.map((entry) => entry.node || entry.host || 'proxy').join(', ');
notificationStore.warning(`Temperature proxy is cooling down the following hosts: ${hosts}`);
}
}
};
const temperatureTransportInfo = createMemo<TemperatureTransportInfo | null>(() => {
const diag = diagnosticsData();
if (!diag?.temperatureProxy) {
return null;
}
const httpMap: TemperatureTransportInfo['httpMap'] = {};
const proxies = diag.temperatureProxy.httpProxies || [];
proxies.forEach((proxy) => {
if (!proxy || !proxy.node) {
return;
}
const key = proxy.node.trim().toLowerCase();
if (!key) {
return;
}
httpMap[key] = {
reachable: Boolean(proxy.reachable),
error: proxy.error || undefined,
url: proxy.url || undefined,
};
});
const socketStatus: TemperatureTransportInfo['socketStatus'] =
diag.temperatureProxy.socketFound && diag.temperatureProxy.proxyReachable
? 'healthy'
: diag.temperatureProxy.socketFound
? 'error'
: 'missing';
const cooldowns: Record<string, TemperatureSocketCooldownInfo> = {};
const socketHosts = diag.temperatureProxy.socketHostCooldowns || [];
(socketHosts as TemperatureProxySocketHost[]).forEach((entry) => {
const key = normalizeHostKey(entry.node) || normalizeHostKey(entry.host);
if (!key) {
return;
}
cooldowns[key] = {
secondsRemaining: entry.secondsRemaining,
until: entry.cooldownUntil,
lastError: entry.lastError || undefined,
};
});
return { httpMap, socketStatus, socketCooldowns: cooldowns };
});
const runDiagnostics = async () => {
setRunningDiagnostics(true);
try {
const response = await apiFetch('/api/diagnostics');
const diag = await response.json();
setDiagnosticsData(diag);
emitTemperatureProxyWarnings(diag);
// hostProxyStatus removed - pulse-sensor-proxy is deprecated in v5
} catch (err) {
logger.error('Failed to fetch diagnostics', err);
notificationStore.error('Failed to run diagnostics');
@ -1040,8 +885,6 @@ const Settings: Component<SettingsProps> = (props) => {
}
};
// refreshHostProxyStatus removed - pulse-sensor-proxy is deprecated in v5
createEffect(() => {
if (typeof window === 'undefined') {
return;
@ -2730,7 +2573,6 @@ const Settings: Component<SettingsProps> = (props) => {
stateNodes={state.nodes ?? []}
stateHosts={state.hosts ?? []}
globalTemperatureMonitoringEnabled={temperatureMonitoringEnabled()}
temperatureTransports={temperatureTransportInfo()}
onTestConnection={testNodeConnection}
onEdit={(node) => {
setEditingNode(node);

View file

@ -545,6 +545,17 @@ export const NodeSummaryTable: Component<NodeSummaryTableProps> = (props) => {
+Agent
</span>
</Show>
<Show when={isPVEItem && online && node!.pendingUpdates !== undefined && node!.pendingUpdates > 0}>
<span
class={`text-[9px] px-1 py-0 rounded font-medium whitespace-nowrap ${(node!.pendingUpdates ?? 0) >= 10
? 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400'
: 'bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400'
}`}
title={`${node!.pendingUpdates} pending apt update${node!.pendingUpdates !== 1 ? 's' : ''}`}
>
{node!.pendingUpdates} updates
</span>
</Show>
<Show when={isPBSItem}>
<span class="text-[9px] px-1 py-0 rounded font-medium bg-purple-100 text-purple-700 dark:bg-purple-900/30 dark:text-purple-400">
PBS

View file

@ -148,6 +148,8 @@ export interface Node {
cpuInfo: CPUInfo;
temperature?: Temperature; // CPU/NVMe temperatures
temperatureMonitoringEnabled?: boolean | null; // Per-node temperature monitoring override
pendingUpdates?: number; // Number of pending apt updates
pendingUpdatesCheckedAt?: string; // When updates were last checked
lastSeen: string;
connectionHealth: string;
isClusterMember?: boolean; // True if part of a cluster

View file

@ -89,6 +89,10 @@ type Node struct {
IsClusterMember bool `json:"isClusterMember"` // True if part of a cluster
ClusterName string `json:"clusterName"` // Name of cluster (empty if standalone)
// Package updates - polled less frequently (every 30 mins)
PendingUpdates int `json:"pendingUpdates"` // Number of pending apt updates
PendingUpdatesCheckedAt time.Time `json:"pendingUpdatesCheckedAt,omitempty"` // When updates were last checked
// Linking: When a host agent is running on this PVE node, link them together
LinkedHostAgentID string `json:"linkedHostAgentId,omitempty"` // ID of the host agent running on this node
}
@ -1061,7 +1065,7 @@ type Temperature struct {
MaxRecorded time.Time `json:"maxRecorded,omitempty"` // When maximum temperature was recorded
Cores []CoreTemp `json:"cores,omitempty"` // Individual core temperatures
GPU []GPUTemp `json:"gpu,omitempty"` // GPU temperatures
NVMe []NVMeTemp `json:"nvme,omitempty"` // NVMe drive temperatures (legacy, from sensor proxy)
NVMe []NVMeTemp `json:"nvme,omitempty"` // NVMe drive temperatures
SMART []DiskTemp `json:"smart,omitempty"` // Physical disk temperatures from SMART data
Available bool `json:"available"` // Whether any temperature data is available
HasCPU bool `json:"hasCPU"` // Whether CPU temperature data is available

View file

@ -85,6 +85,7 @@ type PVEClientInterface interface {
GetZFSPoolStatus(ctx context.Context, node string) ([]proxmox.ZFSPoolStatus, error)
GetZFSPoolsWithDetails(ctx context.Context, node string) ([]proxmox.ZFSPoolInfo, error)
GetDisks(ctx context.Context, node string) ([]proxmox.Disk, error)
GetNodePendingUpdates(ctx context.Context, node string) ([]proxmox.AptPackage, error)
GetCephStatus(ctx context.Context) (*proxmox.CephStatus, error)
GetCephDF(ctx context.Context) (*proxmox.CephDF, error)
}
@ -770,8 +771,9 @@ type Monitor struct {
instanceInfoCache map[string]*instanceInfo
pollStatusMap map[string]*pollStatus
dlqInsightMap map[string]*dlqInsight
nodeLastOnline map[string]time.Time // Track last time each node was seen online (for grace period)
resourceStore ResourceStoreInterface // Optional unified resource store for polling optimization
nodeLastOnline map[string]time.Time // Track last time each node was seen online (for grace period)
nodePendingUpdatesCache map[string]pendingUpdatesCache // Cache pending updates per node (checked every 30 min)
resourceStore ResourceStoreInterface // Optional unified resource store for polling optimization
mockMetricsCancel context.CancelFunc
mockMetricsWg sync.WaitGroup
dockerChecker DockerChecker // Optional Docker checker for LXC containers
@ -787,6 +789,15 @@ type rrdMemCacheEntry struct {
fetchedAt time.Time
}
// pendingUpdatesCache caches apt pending updates count per node
type pendingUpdatesCache struct {
count int
checkedAt time.Time
}
// TTL for pending updates cache (30 minutes - balance between freshness and API load)
const pendingUpdatesCacheTTL = 30 * time.Minute
// agentProfileCacheEntry caches agent profiles and assignments to avoid disk I/O on every agent report.
// TTL is 60 seconds to balance freshness with performance.
type agentProfileCacheEntry struct {
@ -3388,13 +3399,6 @@ func New(cfg *config.Config) (*Monitor, error) {
// Security warning if running in container with SSH temperature monitoring
checkContainerizedTempMonitoring()
if cfg.TemperatureMonitoringEnabled {
isContainer := os.Getenv("PULSE_DOCKER") == "true" || system.InContainer()
if isContainer && tempCollector != nil && !tempCollector.SocketProxyAvailable() {
log.Warn().Msg("Temperature monitoring is enabled but the container does not have access to pulse-sensor-proxy. Install the proxy on the host or disable temperatures until it is available.")
}
}
stalenessTracker := NewStalenessTracker(getPollMetrics())
stalenessTracker.SetBounds(cfg.AdaptivePollingBaseInterval, cfg.AdaptivePollingMaxInterval)
taskQueue := NewTaskQueue()
@ -3546,6 +3550,7 @@ func New(cfg *config.Config) (*Monitor, error) {
pollStatusMap: make(map[string]*pollStatus),
dlqInsightMap: make(map[string]*dlqInsight),
nodeLastOnline: make(map[string]time.Time),
nodePendingUpdatesCache: make(map[string]pendingUpdatesCache),
}
m.breakerBaseRetry = 5 * time.Second

View file

@ -132,6 +132,10 @@ func (s *stubPVEClient) GetCephDF(ctx context.Context) (*proxmox.CephDF, error)
return nil, nil
}
func (s *stubPVEClient) GetNodePendingUpdates(ctx context.Context, node string) ([]proxmox.AptPackage, error) {
return nil, nil
}
func floatPtr(v float64) *float64 { return &v }
func TestPollPVEInstanceUsesRRDMemUsedFallback(t *testing.T) {

View file

@ -2205,7 +2205,7 @@ func (m *Monitor) pollPVENode(
}
// If no host agent temp or we need additional data (SMART), try SSH/proxy collection
var proxyTemp *models.Temperature
var sshTemp *models.Temperature
var err error
if m.tempCollector != nil {
// Temperature collection is best-effort - use a short timeout to avoid blocking node polling
@ -2248,15 +2248,11 @@ func (m *Monitor) pollPVENode(
sshHost = node.Node
}
// Skip SSH/proxy collection if we already have host agent data and no proxy is configured
// (proxy might provide additional SMART data that host agent doesn't have)
skipProxyCollection := hostAgentTemp != nil &&
strings.TrimSpace(instanceCfg.TemperatureProxyURL) == "" &&
!m.HasSocketTemperatureProxy()
// Skip SSH collection if we already have host agent data.
skipSSHCollection := hostAgentTemp != nil
if !skipProxyCollection {
// Use HTTP proxy if configured for this instance, otherwise fall back to socket/SSH
proxyTemp, err = m.tempCollector.CollectTemperatureWithProxy(tempCtx, sshHost, node.Node, instanceCfg.TemperatureProxyURL, instanceCfg.TemperatureProxyToken)
if !skipSSHCollection {
sshTemp, err = m.tempCollector.CollectTemperature(tempCtx, sshHost, node.Node)
if err != nil && hostAgentTemp == nil {
log.Debug().
Str("node", node.Node).
@ -2267,25 +2263,25 @@ func (m *Monitor) pollPVENode(
}
}
// Debug: log proxy temp details before merge
if proxyTemp != nil {
// Debug: log SSH temp details before merge
if sshTemp != nil {
log.Debug().
Str("node", node.Node).
Bool("proxyTempAvailable", proxyTemp.Available).
Bool("proxyHasSMART", proxyTemp.HasSMART).
Int("proxySMARTCount", len(proxyTemp.SMART)).
Bool("proxyHasNVMe", proxyTemp.HasNVMe).
Int("proxyNVMeCount", len(proxyTemp.NVMe)).
Msg("Proxy temperature data before merge")
Bool("sshTempAvailable", sshTemp.Available).
Bool("sshHasSMART", sshTemp.HasSMART).
Int("sshSMARTCount", len(sshTemp.SMART)).
Bool("sshHasNVMe", sshTemp.HasNVMe).
Int("sshNVMeCount", len(sshTemp.NVMe)).
Msg("SSH temperature data before merge")
} else {
log.Debug().
Str("node", node.Node).
Msg("Proxy temperature data is nil")
Msg("SSH temperature data is nil")
}
}
// Merge host agent and proxy temperatures
temp := mergeTemperatureData(hostAgentTemp, proxyTemp)
// Merge host agent and SSH temperatures
temp := mergeTemperatureData(hostAgentTemp, sshTemp)
if temp != nil && temp.Available {
// Get the current CPU temperature (prefer package, fall back to max)
@ -2333,11 +2329,11 @@ func (m *Monitor) pollPVENode(
modelNode.Temperature = temp
// Determine source for logging
tempSource := "proxy/ssh"
if hostAgentTemp != nil && proxyTemp == nil {
tempSource := "ssh"
if hostAgentTemp != nil && sshTemp == nil {
tempSource = "host-agent"
} else if hostAgentTemp != nil && proxyTemp != nil {
tempSource = "host-agent+proxy"
} else if hostAgentTemp != nil && sshTemp != nil {
tempSource = "host-agent+ssh"
}
log.Debug().
@ -2381,6 +2377,54 @@ func (m *Monitor) pollPVENode(
}
}
// Poll pending apt updates (less frequently - every 30 minutes)
// Only for online nodes to avoid wasting API calls on offline nodes
if effectiveStatus == "online" {
now := time.Now()
m.mu.RLock()
cached, hasCached := m.nodePendingUpdatesCache[nodeID]
m.mu.RUnlock()
if !hasCached || now.Sub(cached.checkedAt) >= pendingUpdatesCacheTTL {
// Time to check for updates
pendingPkgs, err := client.GetNodePendingUpdates(ctx, node.Node)
if err != nil {
// API call failed - preserve cached value if available, don't spam logs
log.Debug().
Err(err).
Str("node", node.Node).
Str("instance", instanceName).
Msg("Could not check pending apt updates (may require Sys.Audit permission)")
if hasCached {
modelNode.PendingUpdates = cached.count
modelNode.PendingUpdatesCheckedAt = cached.checkedAt
}
} else {
updateCount := len(pendingPkgs)
modelNode.PendingUpdates = updateCount
modelNode.PendingUpdatesCheckedAt = now
// Cache the result
m.mu.Lock()
m.nodePendingUpdatesCache[nodeID] = pendingUpdatesCache{
count: updateCount,
checkedAt: now,
}
m.mu.Unlock()
log.Debug().
Str("node", node.Node).
Str("instance", instanceName).
Int("pendingUpdates", updateCount).
Msg("Checked pending apt updates")
}
} else {
// Use cached value
modelNode.PendingUpdates = cached.count
modelNode.PendingUpdatesCheckedAt = cached.checkedAt
}
}
if m.pollMetrics != nil {
nodeNameLabel := strings.TrimSpace(node.Node)
if nodeNameLabel == "" {

View file

@ -96,6 +96,9 @@ func (f fakeSnapshotClient) GetCephStatus(ctx context.Context) (*proxmox.CephSta
return nil, nil
}
func (f fakeSnapshotClient) GetCephDF(ctx context.Context) (*proxmox.CephDF, error) { return nil, nil }
func (f fakeSnapshotClient) GetNodePendingUpdates(ctx context.Context, node string) ([]proxmox.AptPackage, error) {
return nil, nil
}
func TestCollectSnapshotSizes(t *testing.T) {
m := &Monitor{}

View file

@ -132,6 +132,10 @@ func (f *fakeStorageClient) GetCephDF(ctx context.Context) (*proxmox.CephDF, err
return nil, nil
}
func (f *fakeStorageClient) GetNodePendingUpdates(ctx context.Context, node string) ([]proxmox.AptPackage, error) {
return nil, nil
}
func TestPollStorageWithNodesOptimizedRecordsMetricsAndAlerts(t *testing.T) {
t.Setenv("PULSE_DATA_DIR", t.TempDir())

View file

@ -2003,3 +2003,34 @@ func (c *Client) GetDisks(ctx context.Context, node string) ([]Disk, error) {
return result.Data, nil
}
// AptPackage represents a pending package update from apt
type AptPackage struct {
Package string `json:"Package"` // Package name
Title string `json:"Title"` // Human-readable title
Description string `json:"Description"` // Package description
OldVersion string `json:"OldVersion"` // Currently installed version
NewVersion string `json:"Version"` // Available version
Priority string `json:"Priority"` // Update priority (e.g., "important", "optional")
Section string `json:"Section"` // Package section
Origin string `json:"Origin"` // Repository origin
}
// GetNodePendingUpdates returns the list of pending apt updates for a node
// Requires Sys.Audit permission on /nodes/{node}
func (c *Client) GetNodePendingUpdates(ctx context.Context, node string) ([]AptPackage, error) {
resp, err := c.get(ctx, fmt.Sprintf("/nodes/%s/apt/update", node))
if err != nil {
return nil, err
}
defer resp.Body.Close()
var result struct {
Data []AptPackage `json:"data"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, err
}
return result.Data, nil
}

View file

@ -1360,6 +1360,32 @@ func (cc *ClusterClient) GetDisks(ctx context.Context, node string) ([]Disk, err
return result, err
}
// GetNodePendingUpdates returns pending apt updates for a node with failover support
func (cc *ClusterClient) GetNodePendingUpdates(ctx context.Context, node string) ([]AptPackage, error) {
var result []AptPackage
err := cc.executeWithFailover(ctx, func(client *Client) error {
pkgs, err := client.GetNodePendingUpdates(ctx, node)
if err != nil {
return err
}
result = pkgs
return nil
})
// Don't return error for transient connectivity issues or permission issues
if err != nil && (strings.Contains(err.Error(), "no healthy nodes available") ||
strings.Contains(err.Error(), "403") || strings.Contains(err.Error(), "permission")) {
log.Debug().
Str("cluster", cc.name).
Str("node", node).
Err(err).
Msg("Could not get pending updates - returning empty list")
return []AptPackage{}, nil
}
return result, err
}
// GetClusterStatus returns the cluster status including all nodes with failover support.
func (cc *ClusterClient) GetClusterStatus(ctx context.Context) ([]ClusterStatus, error) {
var result []ClusterStatus