mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-07 17:19:57 +00:00
508 lines
14 KiB
Go
508 lines
14 KiB
Go
package hostmetrics
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
|
|
agentshost "github.com/rcourtman/pulse-go-rewrite/pkg/agents/host"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
const maxZpoolCommandOutputSize = 1 << 20 // 1 MiB
|
|
|
|
var errZpoolCommandOutputTooLarge = errors.New("zpool command output exceeded limit")
|
|
|
|
// zpoolStats represents capacity data reported by `zpool list`.
|
|
type zpoolStats struct {
|
|
Size uint64
|
|
Alloc uint64
|
|
Free uint64
|
|
}
|
|
|
|
// zfsDatasetUsage preserves per-dataset usage so we can reconcile pools later.
|
|
type zfsDatasetUsage struct {
|
|
Pool string
|
|
Dataset string
|
|
Mountpoint string
|
|
Total uint64
|
|
Used uint64
|
|
Free uint64
|
|
}
|
|
|
|
var queryZpoolStats = fetchZpoolStats
|
|
var zpoolLookPath = exec.LookPath
|
|
var zpoolStat = os.Stat
|
|
var zpoolCommandRunner = runZpoolCommand
|
|
|
|
type limitedBuffer struct {
|
|
buf bytes.Buffer
|
|
maxBytes int
|
|
exceeded bool
|
|
}
|
|
|
|
func (b *limitedBuffer) Write(p []byte) (int, error) {
|
|
remaining := b.maxBytes - b.buf.Len()
|
|
if remaining <= 0 {
|
|
b.exceeded = true
|
|
return 0, errZpoolCommandOutputTooLarge
|
|
}
|
|
if len(p) > remaining {
|
|
b.exceeded = true
|
|
written, _ := b.buf.Write(p[:remaining])
|
|
return written, errZpoolCommandOutputTooLarge
|
|
}
|
|
return b.buf.Write(p)
|
|
}
|
|
|
|
func (b *limitedBuffer) Bytes() []byte {
|
|
return b.buf.Bytes()
|
|
}
|
|
|
|
func summarizeZFSPools(ctx context.Context, datasets []zfsDatasetUsage) []agentshost.Disk {
|
|
if len(datasets) == 0 {
|
|
log.Debug().Msg("zfs: no datasets to summarize")
|
|
return nil
|
|
}
|
|
|
|
pools := uniqueZFSPools(datasets)
|
|
if len(pools) == 0 {
|
|
log.Debug().Msg("zfs: no unique pools found from datasets")
|
|
return nil
|
|
}
|
|
log.Debug().Int("datasetCount", len(datasets)).Strs("pools", pools).Msg("zfs: summarizing pools")
|
|
|
|
bestDatasets := bestZFSPoolDatasets(datasets)
|
|
mountpoints := bestZFSMountpoints(datasets)
|
|
for pool, ds := range bestDatasets {
|
|
log.Debug().Str("pool", pool).Str("dataset", ds.Dataset).Str("mount", ds.Mountpoint).Uint64("total", ds.Total).Uint64("used", ds.Used).Msg("zfs: best dataset for pool")
|
|
}
|
|
|
|
stats, err := queryZpoolStats(ctx, pools)
|
|
if err == nil && len(stats) > 0 {
|
|
log.Debug().Int("zpoolStatsCount", len(stats)).Msg("zfs: using zpool stats")
|
|
return disksFromZpoolStats(pools, stats, mountpoints, bestDatasets)
|
|
}
|
|
|
|
if err != nil {
|
|
log.Debug().Err(err).Msg("zfs: zpool stats unavailable, using fallback")
|
|
} else {
|
|
log.Debug().Msg("zfs: zpool stats query returned no data, using fallback")
|
|
}
|
|
return fallbackZFSDisks(bestDatasets, mountpoints)
|
|
}
|
|
|
|
// newZFSDisk constructs a Disk entry for a ZFS pool with computed usage.
|
|
func newZFSDisk(pool, mountpoint string, total, used, free uint64) agentshost.Disk {
|
|
return agentshost.Disk{
|
|
Device: pool,
|
|
Mountpoint: mountpoint,
|
|
Filesystem: "zfs",
|
|
Type: "zfs",
|
|
TotalBytes: int64(total),
|
|
UsedBytes: int64(used),
|
|
FreeBytes: int64(free),
|
|
Usage: clampPercent(calculatePercent(total, used)),
|
|
}
|
|
}
|
|
|
|
func disksFromZpoolStats(
|
|
pools []string,
|
|
stats map[string]zpoolStats,
|
|
mountpoints map[string]string,
|
|
bestDatasets map[string]zfsDatasetUsage,
|
|
) []agentshost.Disk {
|
|
disks := make([]agentshost.Disk, 0, len(pools))
|
|
|
|
for _, pool := range pools {
|
|
stat, ok := stats[pool]
|
|
mp := mountpoints[pool]
|
|
if mp == "" {
|
|
mp = fmt.Sprintf("zpool:%s", pool)
|
|
}
|
|
|
|
ds := bestDatasets[pool]
|
|
log.Debug().Str("pool", pool).Bool("hasZpoolStats", ok).Uint64("zpoolSize", stat.Size).Uint64("zpoolAlloc", stat.Alloc).Uint64("zpoolFree", stat.Free).Uint64("dsTotal", ds.Total).Uint64("dsUsed", ds.Used).Str("mount", mp).Msg("zfs: processing pool")
|
|
|
|
if ok && stat.Size > 0 {
|
|
// Compute pool-level usable capacity by combining zpool stats with
|
|
// dataset stats. ZFS statfs on a dataset returns per-dataset Used
|
|
// (missing zvols and other datasets), but its Free reflects real
|
|
// pool-available space. We use the ratio ds.Free/stat.Free to
|
|
// convert the raw zpool Size to usable capacity. This handles
|
|
// RAIDZ (parity overhead), mirrors, and simple pools uniformly,
|
|
// and Used = Total - Free captures all pool consumers including
|
|
// zvols. (issues #1052, mirror-vdev fix)
|
|
totalBytes := stat.Size
|
|
freeBytes := stat.Free
|
|
if ds.Free > 0 && stat.Free > 0 && stat.Free >= ds.Free {
|
|
// Convert raw pool total to usable capacity using the
|
|
// raw-to-usable ratio derived from free space.
|
|
// For mirrors the ratio is ~1 (no overhead).
|
|
// For RAIDZ the ratio is (N-P)/N (parity overhead).
|
|
totalBytes = uint64(float64(stat.Size) * (float64(ds.Free) / float64(stat.Free)))
|
|
freeBytes = ds.Free
|
|
log.Debug().Str("pool", pool).Uint64("usableTotal", totalBytes).Uint64("usableFree", freeBytes).Uint64("zpoolSize", stat.Size).Uint64("zpoolFree", stat.Free).Uint64("dsFree", ds.Free).Msg("zfs: computed usable capacity from free-space ratio")
|
|
} else {
|
|
log.Debug().Str("pool", pool).Uint64("zpoolSize", stat.Size).Uint64("zpoolFree", stat.Free).Uint64("dsFree", ds.Free).Msg("zfs: using raw zpool stats (no usable dataset free)")
|
|
}
|
|
usedBytes := totalBytes - freeBytes
|
|
if freeBytes > totalBytes {
|
|
usedBytes = 0
|
|
}
|
|
|
|
log.Debug().Str("pool", pool).Int64("totalBytes", int64(totalBytes)).Int64("usedBytes", int64(usedBytes)).Int64("freeBytes", int64(freeBytes)).Float64("usage", clampPercent(calculatePercent(totalBytes, usedBytes))).Msg("zfs: emitting disk entry")
|
|
disks = append(disks, newZFSDisk(pool, mp, totalBytes, usedBytes, freeBytes))
|
|
continue
|
|
}
|
|
|
|
if ds.Total > 0 {
|
|
log.Debug().Str("pool", pool).Int64("totalBytes", int64(ds.Total)).Int64("usedBytes", int64(ds.Used)).Float64("usage", clampPercent(calculatePercent(ds.Total, ds.Used))).Msg("zfs: emitting disk entry from dataset only (no zpool stats)")
|
|
disks = append(disks, newZFSDisk(pool, mp, ds.Total, ds.Used, ds.Free))
|
|
} else {
|
|
log.Debug().Str("pool", pool).Msg("zfs: skipping pool with no zpool stats and zero dataset total")
|
|
}
|
|
}
|
|
|
|
return disks
|
|
}
|
|
|
|
func fallbackZFSDisks(bestDatasets map[string]zfsDatasetUsage, mountpoints map[string]string) []agentshost.Disk {
|
|
log.Debug().Int("poolCount", len(bestDatasets)).Msg("zfs: fallback disk generation")
|
|
if len(bestDatasets) == 0 {
|
|
return nil
|
|
}
|
|
|
|
pools := make([]string, 0, len(bestDatasets))
|
|
for pool := range bestDatasets {
|
|
pools = append(pools, pool)
|
|
}
|
|
sort.Strings(pools)
|
|
|
|
disks := make([]agentshost.Disk, 0, len(pools))
|
|
for _, pool := range pools {
|
|
ds := bestDatasets[pool]
|
|
if ds.Total == 0 {
|
|
continue
|
|
}
|
|
|
|
mp := mountpoints[pool]
|
|
if mp == "" {
|
|
mp = fmt.Sprintf("zpool:%s", pool)
|
|
}
|
|
|
|
disks = append(disks, newZFSDisk(pool, mp, ds.Total, ds.Used, ds.Free))
|
|
}
|
|
|
|
return disks
|
|
}
|
|
|
|
// commonZpoolPaths lists common locations for the zpool binary.
|
|
// TrueNAS SCALE, FreeBSD, and various Linux distributions may install
|
|
// zpool in different locations that might not be in the agent's PATH.
|
|
// This helps fix issue #718 where TrueNAS reports inflated storage.
|
|
var commonZpoolPaths = []string{
|
|
"/usr/sbin/zpool", // TrueNAS SCALE, Debian, Ubuntu
|
|
"/sbin/zpool", // FreeBSD, older Linux
|
|
"/usr/local/sbin/zpool", // FreeBSD ports, custom builds
|
|
"/usr/local/bin/zpool", // Custom installations
|
|
"/opt/zfs/bin/zpool", // Some enterprise Linux
|
|
"/usr/bin/zpool", // Some distributions
|
|
}
|
|
|
|
// findZpool returns the path to the zpool binary by preferring known absolute
|
|
// locations first, then falling back to PATH lookup.
|
|
func findZpool() (string, error) {
|
|
// Prefer common absolute paths so execution does not depend on PATH order.
|
|
// This is especially important for TrueNAS SCALE where the agent
|
|
// might run with a restricted PATH that doesn't include /usr/sbin
|
|
for _, path := range commonZpoolPaths {
|
|
if _, err := zpoolStat(path); err == nil {
|
|
log.Debug().Str("path", path).Msg("zfs: found zpool at hardcoded path")
|
|
return path, nil
|
|
}
|
|
}
|
|
|
|
// Fall back to PATH lookup for non-standard installations.
|
|
path, err := zpoolLookPath("zpool")
|
|
if err != nil {
|
|
log.Debug().Msg("zfs: zpool binary not found in PATH or common locations")
|
|
return "", fmt.Errorf("zpool binary not found in PATH or common locations")
|
|
}
|
|
path = filepath.Clean(path)
|
|
if !filepath.IsAbs(path) {
|
|
return "", fmt.Errorf("zpool path is not absolute: %q", path)
|
|
}
|
|
if _, err := zpoolStat(path); err != nil {
|
|
return "", fmt.Errorf("zpool path unavailable: %w", err)
|
|
}
|
|
|
|
log.Debug().Str("path", path).Msg("zfs: found zpool via PATH")
|
|
return path, nil
|
|
}
|
|
|
|
func fetchZpoolStats(ctx context.Context, pools []string) (map[string]zpoolStats, error) {
|
|
if len(pools) == 0 {
|
|
return nil, nil
|
|
}
|
|
pools = filterValidZFSPoolNames(pools)
|
|
if len(pools) == 0 {
|
|
return nil, fmt.Errorf("no valid zfs pool names to query")
|
|
}
|
|
|
|
path, err := findZpool()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("zfs: locate zpool binary: %w", err)
|
|
}
|
|
|
|
args := []string{"list", "-Hp", "-o", "name,size,allocated,free"}
|
|
args = append(args, pools...)
|
|
|
|
log.Debug().Str("path", path).Strs("args", args).Msg("zfs: executing zpool list")
|
|
output, stderr, err := zpoolCommandRunner(ctx, path, args...)
|
|
if err != nil {
|
|
if errors.Is(err, errZpoolCommandOutputTooLarge) {
|
|
return nil, fmt.Errorf("zpool list output exceeded %d bytes", maxZpoolCommandOutputSize)
|
|
}
|
|
log.Debug().Err(err).Str("path", path).Strs("args", args).Int("stderrBytes", len(stderr)).Msg("zfs: zpool list failed")
|
|
return nil, err
|
|
}
|
|
log.Debug().Int("outputBytes", len(output)).Msg("zfs: zpool list succeeded")
|
|
|
|
stats, err := parseZpoolList(output)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("zfs: parse zpool list output: %w", err)
|
|
}
|
|
return stats, nil
|
|
}
|
|
|
|
func runZpoolCommand(ctx context.Context, name string, args ...string) ([]byte, []byte, error) {
|
|
cmd := exec.CommandContext(ctx, name, args...)
|
|
stdout := limitedBuffer{maxBytes: maxZpoolCommandOutputSize}
|
|
stderr := limitedBuffer{maxBytes: maxZpoolCommandOutputSize}
|
|
cmd.Stdout = &stdout
|
|
cmd.Stderr = &stderr
|
|
err := cmd.Run()
|
|
if stdout.exceeded || stderr.exceeded {
|
|
return stdout.Bytes(), stderr.Bytes(), errZpoolCommandOutputTooLarge
|
|
}
|
|
return stdout.Bytes(), stderr.Bytes(), err
|
|
}
|
|
|
|
func parseZpoolList(output []byte) (map[string]zpoolStats, error) {
|
|
stats := make(map[string]zpoolStats)
|
|
scanner := bufio.NewScanner(bytes.NewReader(output))
|
|
var firstParseErr error
|
|
invalidLines := 0
|
|
lineNumber := 0
|
|
for scanner.Scan() {
|
|
lineNumber++
|
|
line := strings.TrimSpace(scanner.Text())
|
|
if line == "" {
|
|
continue
|
|
}
|
|
|
|
fields := strings.Split(line, "\t")
|
|
if len(fields) < 4 {
|
|
invalidLines++
|
|
if firstParseErr == nil {
|
|
firstParseErr = fmt.Errorf("line %d: expected at least 4 tab-separated fields", lineNumber)
|
|
}
|
|
continue
|
|
}
|
|
pool, ok := normalizeZFSPoolName(fields[0])
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
size, err := strconv.ParseUint(fields[1], 10, 64)
|
|
if err != nil {
|
|
invalidLines++
|
|
if firstParseErr == nil {
|
|
firstParseErr = fmt.Errorf("line %d: parse size %q: %w", lineNumber, fields[1], err)
|
|
}
|
|
continue
|
|
}
|
|
alloc, err := strconv.ParseUint(fields[2], 10, 64)
|
|
if err != nil {
|
|
invalidLines++
|
|
if firstParseErr == nil {
|
|
firstParseErr = fmt.Errorf("line %d: parse allocated %q: %w", lineNumber, fields[2], err)
|
|
}
|
|
continue
|
|
}
|
|
free, err := strconv.ParseUint(fields[3], 10, 64)
|
|
if err != nil {
|
|
invalidLines++
|
|
if firstParseErr == nil {
|
|
firstParseErr = fmt.Errorf("line %d: parse free %q: %w", lineNumber, fields[3], err)
|
|
}
|
|
continue
|
|
}
|
|
|
|
stats[pool] = zpoolStats{
|
|
Size: size,
|
|
Alloc: alloc,
|
|
Free: free,
|
|
}
|
|
}
|
|
|
|
if err := scanner.Err(); err != nil {
|
|
return nil, fmt.Errorf("scan zpool list output: %w", err)
|
|
}
|
|
if len(stats) == 0 {
|
|
if firstParseErr != nil {
|
|
return nil, fmt.Errorf("zpool list returned no usable data (%d invalid lines): %w", invalidLines, firstParseErr)
|
|
}
|
|
return nil, errors.New("zpool list returned no usable data")
|
|
}
|
|
if invalidLines > 0 {
|
|
log.Debug().
|
|
Int("invalidLines", invalidLines).
|
|
Int("validPools", len(stats)).
|
|
Msg("zfs: skipped malformed zpool list rows")
|
|
}
|
|
return stats, nil
|
|
}
|
|
|
|
func uniqueZFSPools(datasets []zfsDatasetUsage) []string {
|
|
set := make(map[string]struct{}, len(datasets))
|
|
for _, ds := range datasets {
|
|
pool, ok := normalizeZFSPoolName(ds.Pool)
|
|
if !ok {
|
|
continue
|
|
}
|
|
set[pool] = struct{}{}
|
|
}
|
|
if len(set) == 0 {
|
|
return nil
|
|
}
|
|
|
|
pools := make([]string, 0, len(set))
|
|
for pool := range set {
|
|
pools = append(pools, pool)
|
|
}
|
|
sort.Strings(pools)
|
|
return pools
|
|
}
|
|
|
|
func filterValidZFSPoolNames(pools []string) []string {
|
|
filtered := make([]string, 0, len(pools))
|
|
seen := make(map[string]struct{}, len(pools))
|
|
for _, pool := range pools {
|
|
normalized, ok := normalizeZFSPoolName(pool)
|
|
if !ok {
|
|
continue
|
|
}
|
|
if _, exists := seen[normalized]; exists {
|
|
continue
|
|
}
|
|
seen[normalized] = struct{}{}
|
|
filtered = append(filtered, normalized)
|
|
}
|
|
return filtered
|
|
}
|
|
|
|
func normalizeZFSPoolName(pool string) (string, bool) {
|
|
pool = strings.TrimSpace(pool)
|
|
if pool == "" || len(pool) > 255 {
|
|
return "", false
|
|
}
|
|
if strings.HasPrefix(pool, "-") {
|
|
return "", false
|
|
}
|
|
|
|
for _, r := range pool {
|
|
switch {
|
|
case r >= 'a' && r <= 'z':
|
|
case r >= 'A' && r <= 'Z':
|
|
case r >= '0' && r <= '9':
|
|
case r == '_' || r == '-' || r == '.' || r == ':':
|
|
default:
|
|
return "", false
|
|
}
|
|
}
|
|
|
|
return pool, true
|
|
}
|
|
|
|
func bestZFSMountpoints(datasets []zfsDatasetUsage) map[string]string {
|
|
mounts := make(map[string]string, len(datasets))
|
|
scores := make(map[string]int, len(datasets))
|
|
|
|
for _, ds := range datasets {
|
|
if ds.Pool == "" || ds.Mountpoint == "" {
|
|
continue
|
|
}
|
|
|
|
score := zfsMountpointScore(ds)
|
|
if current, ok := scores[ds.Pool]; ok && score >= current {
|
|
continue
|
|
}
|
|
scores[ds.Pool] = score
|
|
mounts[ds.Pool] = ds.Mountpoint
|
|
}
|
|
|
|
return mounts
|
|
}
|
|
|
|
func zfsMountpointScore(ds zfsDatasetUsage) int {
|
|
if ds.Dataset != "" && !strings.Contains(ds.Dataset, "/") {
|
|
return 0
|
|
}
|
|
path := strings.Trim(ds.Mountpoint, "/")
|
|
if path == "" {
|
|
return 1
|
|
}
|
|
return 1 + strings.Count(path, "/")
|
|
}
|
|
|
|
func zfsPoolFromDevice(device string) string {
|
|
device = strings.TrimSpace(device)
|
|
if device == "" {
|
|
return ""
|
|
}
|
|
if idx := strings.Index(device, "/"); idx >= 0 {
|
|
return device[:idx]
|
|
}
|
|
return device
|
|
}
|
|
|
|
func calculatePercent(total, used uint64) float64 {
|
|
if total == 0 {
|
|
return 0
|
|
}
|
|
return (float64(used) / float64(total)) * 100
|
|
}
|
|
|
|
func clampPercent(value float64) float64 {
|
|
switch {
|
|
case value < 0:
|
|
return 0
|
|
case value > 100:
|
|
return 100
|
|
default:
|
|
return value
|
|
}
|
|
}
|
|
|
|
func bestZFSPoolDatasets(datasets []zfsDatasetUsage) map[string]zfsDatasetUsage {
|
|
best := make(map[string]zfsDatasetUsage)
|
|
for _, ds := range datasets {
|
|
if ds.Pool == "" {
|
|
continue
|
|
}
|
|
if current, ok := best[ds.Pool]; !ok || ds.Total > current.Total {
|
|
best[ds.Pool] = ds
|
|
}
|
|
}
|
|
return best
|
|
}
|