fix(alerts): make --disk-exclude suppress Proxmox SSD wear/health alerts (#1142)

The --disk-exclude agent flag only filtered local metric collection but
had no effect on server-side Proxmox disk health and SSD wearout alerts,
which poll the Proxmox API directly. Users excluding disks (e.g.
--disk-exclude sda) still received alerts for those disks.

Agent now sends its DiskExclude patterns in each report. The server
stores them on the Host model and consults them during Proxmox disk
polling — excluded disks get a synthetic healthy status passed to
CheckDiskHealth so any existing alerts clear immediately.

Also adds FreeBSD pseudo-filesystem types (fdescfs, devfs, linprocfs,
linsysfs) to the virtual FS filter and /var/run/ to special mount
prefixes, fixing false disk-full alerts on FreeBSD for fdescfs mounts.
This commit is contained in:
rcourtman 2026-02-20 00:26:10 +00:00
parent bc378f0f60
commit 8c7d507ea4
6 changed files with 60 additions and 7 deletions

View file

@ -374,6 +374,7 @@ func (a *Agent) buildReport(ctx context.Context) (agentshost.Report, error) {
Hostname: a.hostname, Hostname: a.hostname,
UpdatedFrom: a.updatedFrom, UpdatedFrom: a.updatedFrom,
CommandsEnabled: a.cfg.EnableCommands, CommandsEnabled: a.cfg.EnableCommands,
DiskExclude: a.cfg.DiskExclude,
}, },
Host: agentshost.HostInfo{ Host: agentshost.HostInfo{
ID: a.machineID, ID: a.machineID,

View file

@ -199,6 +199,7 @@ type Host struct {
TokenLastUsedAt *time.Time `json:"tokenLastUsedAt,omitempty"` TokenLastUsedAt *time.Time `json:"tokenLastUsedAt,omitempty"`
Tags []string `json:"tags,omitempty"` Tags []string `json:"tags,omitempty"`
IsLegacy bool `json:"isLegacy,omitempty"` IsLegacy bool `json:"isLegacy,omitempty"`
DiskExclude []string `json:"diskExclude,omitempty"` // Agent's --disk-exclude patterns
// Linking: When this host agent is running on a known PVE node/VM/container // Linking: When this host agent is running on a known PVE node/VM/container
LinkedNodeID string `json:"linkedNodeId,omitempty"` // ID of the PVE node this agent is running on LinkedNodeID string `json:"linkedNodeId,omitempty"` // ID of the PVE node this agent is running on

View file

@ -2679,6 +2679,7 @@ func (m *Monitor) ApplyHostReport(report agentshost.Report, tokenRecord *config.
ReportIP: strings.TrimSpace(report.Host.ReportIP), ReportIP: strings.TrimSpace(report.Host.ReportIP),
Tags: append([]string(nil), report.Tags...), Tags: append([]string(nil), report.Tags...),
IsLegacy: isLegacyHostAgent(report.Agent.Type), IsLegacy: isLegacyHostAgent(report.Agent.Type),
DiskExclude: append([]string(nil), report.Agent.DiskExclude...),
} }
// Apply any pending commands execution override from server config // Apply any pending commands execution override from server config
@ -6278,6 +6279,23 @@ func (m *Monitor) pollPVEInstance(ctx context.Context, instanceName string, clie
} }
} }
// Build a map of node name -> disk exclusion patterns from linked host agents.
// This allows --disk-exclude on the agent to also suppress server-side
// Proxmox disk health/wearout alerts for the same disks.
diskExcludeByNode := make(map[string][]string)
hostByID := make(map[string]models.Host, len(currentState.Hosts))
for _, h := range currentState.Hosts {
hostByID[h.ID] = h
}
for _, n := range currentState.Nodes {
if n.LinkedHostAgentID == "" || n.Instance != inst {
continue
}
if linkedHost, ok := hostByID[n.LinkedHostAgentID]; ok && len(linkedHost.DiskExclude) > 0 && linkedHost.Status == "online" {
diskExcludeByNode[n.Name] = linkedHost.DiskExclude
}
}
var allDisks []models.PhysicalDisk var allDisks []models.PhysicalDisk
polledNodes := make(map[string]bool) // Track which nodes we successfully polled polledNodes := make(map[string]bool) // Track which nodes we successfully polled
@ -6356,6 +6374,25 @@ func (m *Monitor) pollPVEInstance(ctx context.Context, instanceName string, clie
Int("wearout", disk.Wearout). Int("wearout", disk.Wearout).
Msg("Checking disk health") Msg("Checking disk health")
// If the linked host agent has --disk-exclude for this disk, pass a
// synthetic healthy disk to CheckDiskHealth so any existing alerts
// get cleared naturally, then skip the normal health/wearout checks.
if excludePatterns, ok := diskExcludeByNode[node.Node]; ok {
if fsfilters.MatchesDeviceExclude(disk.DevPath, excludePatterns) {
log.Debug().
Str("node", node.Node).
Str("disk", disk.DevPath).
Msg("Disk matches agent --disk-exclude, clearing any alerts")
// Synthetic healthy disk: health="PASSED", wearout=100 (full life)
// This causes CheckDiskHealth to clear both health and wearout alerts.
healthyDisk := disk
healthyDisk.Health = "PASSED"
healthyDisk.Wearout = 100
m.alertManager.CheckDiskHealth(inst, node.Node, healthyDisk)
continue
}
}
normalizedHealth := strings.ToUpper(strings.TrimSpace(disk.Health)) normalizedHealth := strings.ToUpper(strings.TrimSpace(disk.Health))
if normalizedHealth != "" && normalizedHealth != "UNKNOWN" && normalizedHealth != "PASSED" && normalizedHealth != "OK" { if normalizedHealth != "" && normalizedHealth != "UNKNOWN" && normalizedHealth != "PASSED" && normalizedHealth != "OK" {
// Disk has failed or is failing - alert manager will handle this // Disk has failed or is failing - alert manager will handle this

View file

@ -20,13 +20,14 @@ type Report struct {
// AgentInfo describes the reporting agent. // AgentInfo describes the reporting agent.
type AgentInfo struct { type AgentInfo struct {
ID string `json:"id"` ID string `json:"id"`
Version string `json:"version,omitempty"` Version string `json:"version,omitempty"`
Type string `json:"type,omitempty"` // "unified", "host", or "docker" - empty means legacy Type string `json:"type,omitempty"` // "unified", "host", or "docker" - empty means legacy
IntervalSeconds int `json:"intervalSeconds,omitempty"` IntervalSeconds int `json:"intervalSeconds,omitempty"`
Hostname string `json:"hostname,omitempty"` Hostname string `json:"hostname,omitempty"`
UpdatedFrom string `json:"updatedFrom,omitempty"` // Previous version if recently auto-updated UpdatedFrom string `json:"updatedFrom,omitempty"` // Previous version if recently auto-updated
CommandsEnabled bool `json:"commandsEnabled,omitempty"` // Whether AI command execution is enabled CommandsEnabled bool `json:"commandsEnabled,omitempty"` // Whether AI command execution is enabled
DiskExclude []string `json:"diskExclude,omitempty"` // Disk exclusion patterns from --disk-exclude flag
} }
// HostInfo contains platform and identification details about the monitored host. // HostInfo contains platform and identification details about the monitored host.

View file

@ -77,6 +77,10 @@ var virtualFSTypes = map[string]bool{
"overlay": true, // Docker/container overlay filesystems (issue #942) "overlay": true, // Docker/container overlay filesystems (issue #942)
"overlayfs": true, // Alternative overlay name "overlayfs": true, // Alternative overlay name
"autofs": true, // Systemd automount placeholders (issue #942) "autofs": true, // Systemd automount placeholders (issue #942)
"fdescfs": true, // FreeBSD file descriptor filesystem (issue #1142)
"devfs": true, // FreeBSD device filesystem
"linprocfs": true, // FreeBSD Linux proc compatibility
"linsysfs": true, // FreeBSD Linux sys compatibility
} }
// networkFSPatterns are substrings that indicate network/remote filesystems. // networkFSPatterns are substrings that indicate network/remote filesystems.
@ -88,6 +92,7 @@ var specialMountPrefixes = []string{
"/proc", "/proc",
"/sys", "/sys",
"/run", "/run",
"/var/run/", // FreeBSD (not a symlink to /run like on Linux)
"/var/lib/containers", "/var/lib/containers",
"/snap", "/snap",
} }

View file

@ -193,6 +193,14 @@ func TestShouldSkipFilesystem(t *testing.T) {
{"Windows C drive - should NOT skip", "NTFS", "C:\\", 500 * 1024 * 1024 * 1024, 200 * 1024 * 1024 * 1024, false}, {"Windows C drive - should NOT skip", "NTFS", "C:\\", 500 * 1024 * 1024 * 1024, 200 * 1024 * 1024 * 1024, false},
{"Windows D drive - should NOT skip", "NTFS", "D:\\", 1000 * 1024 * 1024 * 1024, 500 * 1024 * 1024 * 1024, false}, {"Windows D drive - should NOT skip", "NTFS", "D:\\", 1000 * 1024 * 1024 * 1024, 500 * 1024 * 1024 * 1024, false},
// FreeBSD pseudo filesystems (issue #1142)
{"FreeBSD fdescfs", "fdescfs", "/var/run/samba/fd", 1024, 1024, true},
{"FreeBSD devfs", "devfs", "/dev", 1024, 100, true},
{"FreeBSD linprocfs", "linprocfs", "/compat/linux/proc", 0, 0, true},
{"FreeBSD linsysfs", "linsysfs", "/compat/linux/sys", 0, 0, true},
{"/var/run/ prefix FreeBSD", "ufs", "/var/run/something", 1024, 100, true},
{"/var/runtime should NOT skip", "ufs", "/var/runtime", 1000000, 500000, false},
// Regular filesystems that should NOT be skipped // Regular filesystems that should NOT be skipped
{"ext4 root", "ext4", "/", 100 * 1024 * 1024 * 1024, 50 * 1024 * 1024 * 1024, false}, {"ext4 root", "ext4", "/", 100 * 1024 * 1024 * 1024, 50 * 1024 * 1024 * 1024, false},
{"xfs data", "xfs", "/data", 500 * 1024 * 1024 * 1024, 200 * 1024 * 1024 * 1024, false}, {"xfs data", "xfs", "/data", 500 * 1024 * 1024 * 1024, 200 * 1024 * 1024 * 1024, false},