mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 11:30:15 +00:00
1194 lines
32 KiB
Go
1194 lines
32 KiB
Go
// Package smartctl provides S.M.A.R.T. data collection from local disks.
|
|
package smartctl
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"regexp"
|
|
"runtime"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
var (
|
|
execLookPath = exec.LookPath
|
|
readDir = os.ReadDir
|
|
readFile = os.ReadFile
|
|
evalSymlinks = filepath.EvalSymlinks
|
|
runCommandOutput = func(ctx context.Context, name string, args ...string) ([]byte, error) {
|
|
return exec.CommandContext(ctx, name, args...).Output()
|
|
}
|
|
timeNow = time.Now
|
|
runtimeGOOS = runtime.GOOS
|
|
|
|
errSMARTDataUnavailable = errors.New("smart data unavailable for device")
|
|
)
|
|
|
|
const smartctlStandbyExitStatus = 3
|
|
|
|
// DiskSMART represents S.M.A.R.T. data for a single disk.
|
|
type DiskSMART struct {
|
|
Device string `json:"device"` // Device path (e.g., /dev/sda)
|
|
Model string `json:"model,omitempty"` // Disk model
|
|
Serial string `json:"serial,omitempty"` // Serial number
|
|
WWN string `json:"wwn,omitempty"` // World Wide Name
|
|
Type string `json:"type,omitempty"` // Transport type: sata, sas, nvme
|
|
Temperature int `json:"temperature"` // Temperature in Celsius
|
|
Health string `json:"health,omitempty"` // PASSED, FAILED, UNKNOWN
|
|
Standby bool `json:"standby,omitempty"` // True if disk was in standby
|
|
Attributes *SMARTAttributes `json:"attributes,omitempty"`
|
|
LastUpdated time.Time `json:"lastUpdated"` // When this reading was taken
|
|
}
|
|
|
|
// SMARTAttributes holds normalized SMART attributes for both SATA and NVMe disks.
|
|
// Pointer fields distinguish zero from absent.
|
|
type SMARTAttributes struct {
|
|
// Common attributes
|
|
PowerOnHours *int64 `json:"powerOnHours,omitempty"`
|
|
PowerCycles *int64 `json:"powerCycles,omitempty"`
|
|
|
|
// SATA-specific (by ATA attribute ID)
|
|
ReallocatedSectors *int64 `json:"reallocatedSectors,omitempty"` // ID 5
|
|
PendingSectors *int64 `json:"pendingSectors,omitempty"` // ID 197
|
|
OfflineUncorrectable *int64 `json:"offlineUncorrectable,omitempty"` // ID 198
|
|
UDMACRCErrors *int64 `json:"udmaCrcErrors,omitempty"` // ID 199
|
|
|
|
// NVMe-specific
|
|
PercentageUsed *int `json:"percentageUsed,omitempty"`
|
|
AvailableSpare *int `json:"availableSpare,omitempty"`
|
|
MediaErrors *int64 `json:"mediaErrors,omitempty"`
|
|
UnsafeShutdowns *int64 `json:"unsafeShutdowns,omitempty"`
|
|
}
|
|
|
|
// smartctlJSON represents the JSON output from smartctl --json.
|
|
type smartctlJSON struct {
|
|
Smartctl struct {
|
|
Output []string `json:"output"`
|
|
} `json:"smartctl"`
|
|
Device struct {
|
|
Name string `json:"name"`
|
|
Type string `json:"type"`
|
|
Protocol string `json:"protocol"`
|
|
} `json:"device"`
|
|
ModelFamily string `json:"model_family"`
|
|
ModelName string `json:"model_name"`
|
|
SerialNumber string `json:"serial_number"`
|
|
WWN struct {
|
|
NAA uint64 `json:"naa"`
|
|
OUI uint64 `json:"oui"`
|
|
ID uint64 `json:"id"`
|
|
} `json:"wwn"`
|
|
SmartStatus *struct {
|
|
Passed bool `json:"passed"`
|
|
} `json:"smart_status"`
|
|
Temperature struct {
|
|
Current int `json:"current"`
|
|
} `json:"temperature"`
|
|
// ATA SMART attributes table
|
|
ATASmartAttributes struct {
|
|
Table []struct {
|
|
ID int `json:"id"`
|
|
Name string `json:"name"`
|
|
Value int `json:"value"`
|
|
Worst int `json:"worst"`
|
|
Thresh int `json:"thresh"`
|
|
Raw struct {
|
|
Value int64 `json:"value"`
|
|
String string `json:"string"`
|
|
} `json:"raw"`
|
|
} `json:"table"`
|
|
} `json:"ata_smart_attributes"`
|
|
ATASCTStatus struct {
|
|
Current struct {
|
|
Value int `json:"value"`
|
|
} `json:"current"`
|
|
} `json:"ata_sct_status"`
|
|
// NVMe-specific health information
|
|
NVMeSmartHealthInformationLog struct {
|
|
Temperature int `json:"temperature"`
|
|
AvailableSpare int `json:"available_spare"`
|
|
PercentageUsed int `json:"percentage_used"`
|
|
PowerOnHours int64 `json:"power_on_hours"`
|
|
UnsafeShutdowns int64 `json:"unsafe_shutdowns"`
|
|
MediaErrors int64 `json:"media_errors"`
|
|
PowerCycles int64 `json:"power_cycles"`
|
|
} `json:"nvme_smart_health_information_log"`
|
|
PowerMode string `json:"power_mode"`
|
|
}
|
|
|
|
type smartTextFallback struct {
|
|
Model string
|
|
Serial string
|
|
Type string
|
|
Health string
|
|
Temperature int
|
|
Standby bool
|
|
}
|
|
|
|
var (
|
|
smartTextTempAttributeRE = regexp.MustCompile(`^\s*(190|194)\s+\S+.*-\s+(\d{1,3})\b`)
|
|
smartTextCurrentTempRE = regexp.MustCompile(`(?i)^current temperature:\s*(\d{1,3})\b`)
|
|
smartTextTemperatureRE = regexp.MustCompile(`(?i)^temperature:\s*(\d{1,3})\b`)
|
|
)
|
|
|
|
type lsblkJSON struct {
|
|
Blockdevices []lsblkDevice `json:"blockdevices"`
|
|
}
|
|
|
|
type lsblkDevice struct {
|
|
Name string `json:"name"`
|
|
Type string `json:"type"`
|
|
Tran string `json:"tran"`
|
|
Model string `json:"model"`
|
|
Vendor string `json:"vendor"`
|
|
Subsystems string `json:"subsystems"`
|
|
}
|
|
|
|
type smartctlTarget struct {
|
|
Path string
|
|
DeviceType string
|
|
}
|
|
|
|
func (t smartctlTarget) displayName() string {
|
|
name := filepath.Base(strings.TrimSpace(t.Path))
|
|
if name == "" || name == "." || name == string(filepath.Separator) {
|
|
name = strings.TrimSpace(t.Path)
|
|
}
|
|
if t.DeviceType == "" {
|
|
return name
|
|
}
|
|
return name + " [" + t.DeviceType + "]"
|
|
}
|
|
|
|
var linuxSMARTVirtualPrefixes = []string{
|
|
"dm-",
|
|
"drbd",
|
|
"loop",
|
|
"md",
|
|
"nbd",
|
|
"pmem",
|
|
"ram",
|
|
"rbd",
|
|
"vd",
|
|
"xvd",
|
|
"zd",
|
|
"zram",
|
|
}
|
|
|
|
var linuxSMARTVirtualMetadataTokens = []string{
|
|
"hyper-v",
|
|
"msft virtual",
|
|
"parallels",
|
|
"qemu",
|
|
"vbox",
|
|
"virtual disk",
|
|
"virtual hd",
|
|
"virtualbox",
|
|
"vmware",
|
|
}
|
|
|
|
var linuxSMARTVirtualSubsystemTokens = []string{
|
|
"drbd",
|
|
"nbd",
|
|
"vmbus",
|
|
"virtio",
|
|
"xen",
|
|
"zfs",
|
|
}
|
|
|
|
// CollectLocal collects S.M.A.R.T. data from all local block devices.
|
|
// The diskExclude parameter specifies patterns for devices to skip (e.g., "sda", "/dev/nvme*", "*cache*").
|
|
func CollectLocal(ctx context.Context, diskExclude []string) ([]DiskSMART, error) {
|
|
targets, err := listSMARTTargets(ctx, diskExclude)
|
|
if err != nil {
|
|
log.Debug().Err(err).Msg("Failed to list block devices for SMART collection")
|
|
return nil, err
|
|
}
|
|
|
|
if len(targets) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
var results []DiskSMART
|
|
for _, target := range targets {
|
|
smart, err := collectSMARTTarget(ctx, target)
|
|
if err != nil {
|
|
log.Debug().Err(err).Str("device", target.displayName()).Msg("Failed to collect SMART data for device")
|
|
continue
|
|
}
|
|
if smart != nil {
|
|
results = append(results, *smart)
|
|
}
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
func listSMARTTargets(ctx context.Context, diskExclude []string) ([]smartctlTarget, error) {
|
|
if runtimeGOOS == "linux" {
|
|
return listSMARTTargetsLinux(ctx, diskExclude)
|
|
}
|
|
|
|
devices, err := listBlockDevices(ctx, diskExclude)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return smartctlTargetsFromDevices(devices), nil
|
|
}
|
|
|
|
func smartctlTargetsFromDevices(devices []string) []smartctlTarget {
|
|
if len(devices) == 0 {
|
|
return nil
|
|
}
|
|
targets := make([]smartctlTarget, 0, len(devices))
|
|
for _, device := range devices {
|
|
targets = append(targets, smartctlTarget{Path: device})
|
|
}
|
|
return targets
|
|
}
|
|
|
|
func listSMARTTargetsLinux(ctx context.Context, diskExclude []string) ([]smartctlTarget, error) {
|
|
targets, err := listSMARTTargetsLinuxFromScanOpen(ctx, diskExclude)
|
|
if err == nil && len(targets) > 0 {
|
|
return targets, nil
|
|
}
|
|
if err != nil {
|
|
log.Debug().Err(err).Msg("Failed to enumerate Linux SMART targets via smartctl --scan-open, falling back to block device discovery")
|
|
}
|
|
|
|
devices, fallbackErr := listBlockDevicesLinux(ctx, diskExclude)
|
|
if fallbackErr != nil {
|
|
return nil, fallbackErr
|
|
}
|
|
return smartctlTargetsFromDevices(devices), nil
|
|
}
|
|
|
|
func listSMARTTargetsLinuxFromScanOpen(ctx context.Context, diskExclude []string) ([]smartctlTarget, error) {
|
|
smartctlPath, err := execLookPath("smartctl")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
output, err := runCommandOutput(ctx, smartctlPath, "--scan-open")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return parseSmartctlScanOpenTargets(output, diskExclude), nil
|
|
}
|
|
|
|
func parseSmartctlScanOpenTargets(output []byte, diskExclude []string) []smartctlTarget {
|
|
lines := strings.Split(string(output), "\n")
|
|
targets := make([]smartctlTarget, 0, len(lines))
|
|
typedByPath := make(map[string]bool)
|
|
seen := make(map[string]struct{})
|
|
|
|
for _, rawLine := range lines {
|
|
line := strings.TrimSpace(rawLine)
|
|
if line == "" {
|
|
continue
|
|
}
|
|
if idx := strings.Index(line, "#"); idx >= 0 {
|
|
line = strings.TrimSpace(line[:idx])
|
|
}
|
|
if line == "" {
|
|
continue
|
|
}
|
|
|
|
fields := strings.Fields(line)
|
|
if len(fields) == 0 {
|
|
continue
|
|
}
|
|
|
|
path := strings.TrimSpace(fields[0])
|
|
if path == "" || (!strings.HasPrefix(path, "/") && !strings.HasPrefix(path, "-")) {
|
|
continue
|
|
}
|
|
|
|
deviceType := ""
|
|
for i := 1; i < len(fields)-1; i++ {
|
|
if fields[i] == "-d" {
|
|
deviceType = strings.TrimSpace(fields[i+1])
|
|
break
|
|
}
|
|
}
|
|
|
|
name := filepath.Base(path)
|
|
if matchesDeviceExclude(name, path, diskExclude) {
|
|
continue
|
|
}
|
|
|
|
key := path + "\x00" + deviceType
|
|
if _, ok := seen[key]; ok {
|
|
continue
|
|
}
|
|
seen[key] = struct{}{}
|
|
if deviceType != "" {
|
|
typedByPath[path] = true
|
|
}
|
|
|
|
targets = append(targets, smartctlTarget{
|
|
Path: path,
|
|
DeviceType: deviceType,
|
|
})
|
|
}
|
|
|
|
if len(targets) == 0 {
|
|
return nil
|
|
}
|
|
|
|
filtered := make([]smartctlTarget, 0, len(targets))
|
|
for _, target := range targets {
|
|
if target.DeviceType == "" && typedByPath[target.Path] {
|
|
continue
|
|
}
|
|
filtered = append(filtered, target)
|
|
}
|
|
return filtered
|
|
}
|
|
|
|
// listBlockDevices returns a list of block devices suitable for SMART queries.
|
|
// Devices matching any of the diskExclude patterns are skipped.
|
|
func listBlockDevices(ctx context.Context, diskExclude []string) ([]string, error) {
|
|
if runtimeGOOS == "freebsd" {
|
|
return listBlockDevicesFreeBSD(ctx, diskExclude)
|
|
}
|
|
return listBlockDevicesLinux(ctx, diskExclude)
|
|
}
|
|
|
|
// listBlockDevicesLinux uses lsblk to find disks on Linux.
|
|
func listBlockDevicesLinux(ctx context.Context, diskExclude []string) ([]string, error) {
|
|
devices, err := listBlockDevicesLinuxFromSysfs(diskExclude)
|
|
if err == nil {
|
|
return devices, nil
|
|
}
|
|
if err != nil {
|
|
log.Debug().Err(err).Msg("Failed to enumerate Linux disks from /sys/block, falling back to lsblk")
|
|
}
|
|
|
|
return listBlockDevicesLinuxFromLSBLK(ctx, diskExclude)
|
|
}
|
|
|
|
func listBlockDevicesLinuxFromSysfs(diskExclude []string) ([]string, error) {
|
|
entries, err := readDir("/sys/block")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var devices []string
|
|
for _, entry := range entries {
|
|
name := strings.TrimSpace(entry.Name())
|
|
if name == "" {
|
|
continue
|
|
}
|
|
|
|
devicePath := "/dev/" + name
|
|
if reason := linuxSMARTSkipReasonSysfs(name); reason != "" {
|
|
log.Debug().
|
|
Str("device", devicePath).
|
|
Str("reason", reason).
|
|
Msg("Skipping non-physical device for SMART collection")
|
|
continue
|
|
}
|
|
if matchesDeviceExclude(name, devicePath, diskExclude) {
|
|
log.Debug().Str("device", devicePath).Msg("Skipping excluded device for SMART collection")
|
|
continue
|
|
}
|
|
devices = append(devices, devicePath)
|
|
}
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
func linuxSMARTSkipReasonSysfs(name string) string {
|
|
for _, prefix := range linuxSMARTVirtualPrefixes {
|
|
if strings.HasPrefix(strings.ToLower(name), prefix) {
|
|
return "virtual/logical device prefix"
|
|
}
|
|
}
|
|
|
|
blockPath := filepath.Join("/sys/block", name)
|
|
if resolved, err := evalSymlinks(blockPath); err == nil && strings.Contains(strings.ToLower(resolved), "/virtual/") {
|
|
return "virtual block device"
|
|
}
|
|
|
|
subsystemPath := filepath.Join(blockPath, "device", "subsystem")
|
|
if resolved, err := evalSymlinks(subsystemPath); err == nil {
|
|
lowerResolved := strings.ToLower(resolved)
|
|
for _, token := range linuxSMARTVirtualSubsystemTokens {
|
|
if strings.Contains(lowerResolved, token) {
|
|
return "virtual/logical subsystem"
|
|
}
|
|
}
|
|
}
|
|
|
|
metadata := strings.ToLower(strings.TrimSpace(
|
|
readTrimmedFile(filepath.Join(blockPath, "device", "vendor")) + " " +
|
|
readTrimmedFile(filepath.Join(blockPath, "device", "model")),
|
|
))
|
|
for _, token := range linuxSMARTVirtualMetadataTokens {
|
|
if strings.Contains(metadata, token) {
|
|
return "virtual disk model/vendor signature"
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
func readTrimmedFile(path string) string {
|
|
data, err := readFile(path)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(string(data))
|
|
}
|
|
|
|
func listBlockDevicesLinuxFromLSBLK(ctx context.Context, diskExclude []string) ([]string, error) {
|
|
output, err := runCommandOutput(ctx, "lsblk", "-J", "-d", "-o", "NAME,TYPE,TRAN,MODEL,VENDOR,SUBSYSTEMS")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var data lsblkJSON
|
|
if err := json.Unmarshal(output, &data); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var devices []string
|
|
for _, disk := range data.Blockdevices {
|
|
if strings.TrimSpace(disk.Name) == "" {
|
|
continue
|
|
}
|
|
|
|
devicePath := "/dev/" + disk.Name
|
|
if reason := linuxSMARTSkipReason(disk); reason != "" {
|
|
log.Debug().
|
|
Str("device", devicePath).
|
|
Str("reason", reason).
|
|
Msg("Skipping non-physical device for SMART collection")
|
|
continue
|
|
}
|
|
if matchesDeviceExclude(disk.Name, devicePath, diskExclude) {
|
|
log.Debug().Str("device", devicePath).Msg("Skipping excluded device for SMART collection")
|
|
continue
|
|
}
|
|
devices = append(devices, devicePath)
|
|
}
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
func linuxSMARTSkipReason(device lsblkDevice) string {
|
|
if !strings.EqualFold(strings.TrimSpace(device.Type), "disk") {
|
|
return "not a whole disk"
|
|
}
|
|
|
|
name := strings.ToLower(strings.TrimSpace(device.Name))
|
|
for _, prefix := range linuxSMARTVirtualPrefixes {
|
|
if strings.HasPrefix(name, prefix) {
|
|
return "virtual/logical device prefix"
|
|
}
|
|
}
|
|
|
|
transport := strings.ToLower(strings.TrimSpace(device.Tran))
|
|
if transport == "virtio" {
|
|
return "virtio transport"
|
|
}
|
|
|
|
subsystems := strings.ToLower(strings.TrimSpace(device.Subsystems))
|
|
for _, token := range linuxSMARTVirtualSubsystemTokens {
|
|
if strings.Contains(subsystems, token) {
|
|
return "virtual/logical subsystem"
|
|
}
|
|
}
|
|
|
|
metadata := strings.ToLower(strings.TrimSpace(device.Vendor + " " + device.Model))
|
|
for _, token := range linuxSMARTVirtualMetadataTokens {
|
|
if strings.Contains(metadata, token) {
|
|
return "virtual disk model/vendor signature"
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
// listBlockDevicesFreeBSD uses sysctl kern.disks to find disks on FreeBSD.
|
|
func listBlockDevicesFreeBSD(ctx context.Context, diskExclude []string) ([]string, error) {
|
|
names, sysctlErr := freeBSDDiskNamesFromSysctl(ctx)
|
|
if sysctlErr != nil {
|
|
log.Debug().Err(sysctlErr).Msg("Failed to enumerate FreeBSD disks from kern.disks")
|
|
}
|
|
|
|
fallbackNames, fallbackErr := freeBSDDiskNamesFromDev()
|
|
if fallbackErr != nil {
|
|
log.Debug().Err(fallbackErr).Msg("Failed to enumerate FreeBSD disks from /dev")
|
|
}
|
|
|
|
if len(names) == 0 {
|
|
names = fallbackNames
|
|
} else if len(fallbackNames) > 0 {
|
|
seen := make(map[string]struct{}, len(names))
|
|
for _, name := range names {
|
|
seen[name] = struct{}{}
|
|
}
|
|
for _, name := range fallbackNames {
|
|
if _, ok := seen[name]; ok {
|
|
continue
|
|
}
|
|
names = append(names, name)
|
|
}
|
|
}
|
|
|
|
if len(names) == 0 {
|
|
switch {
|
|
case sysctlErr != nil:
|
|
return nil, sysctlErr
|
|
case fallbackErr != nil:
|
|
return nil, fallbackErr
|
|
default:
|
|
return nil, nil
|
|
}
|
|
}
|
|
|
|
var devices []string
|
|
for _, name := range names {
|
|
devicePath := "/dev/" + name
|
|
if matchesDeviceExclude(name, devicePath, diskExclude) {
|
|
log.Debug().Str("device", devicePath).Msg("Skipping excluded device for SMART collection")
|
|
continue
|
|
}
|
|
devices = append(devices, devicePath)
|
|
}
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
func freeBSDDiskNamesFromSysctl(ctx context.Context) ([]string, error) {
|
|
output, err := runCommandOutput(ctx, "sysctl", "-n", "kern.disks")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var devices []string
|
|
seen := make(map[string]struct{})
|
|
for _, name := range strings.Fields(strings.TrimSpace(string(output))) {
|
|
if name == "" {
|
|
continue
|
|
}
|
|
if _, ok := seen[name]; ok {
|
|
continue
|
|
}
|
|
seen[name] = struct{}{}
|
|
devices = append(devices, name)
|
|
}
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
func freeBSDDiskNamesFromDev() ([]string, error) {
|
|
entries, err := readDir("/dev")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var names []string
|
|
for _, entry := range entries {
|
|
name := strings.TrimSpace(entry.Name())
|
|
if !isFreeBSDDiskDeviceName(name) {
|
|
continue
|
|
}
|
|
names = append(names, name)
|
|
}
|
|
sort.Strings(names)
|
|
|
|
return names, nil
|
|
}
|
|
|
|
func isFreeBSDDiskDeviceName(name string) bool {
|
|
for _, prefix := range []string{
|
|
"ad",
|
|
"ada",
|
|
"aacd",
|
|
"amrd",
|
|
"da",
|
|
"idad",
|
|
"ipsd",
|
|
"mfid",
|
|
"mfisyspd",
|
|
"mlxd",
|
|
"mmcsd",
|
|
"nda",
|
|
"nvd",
|
|
"nvme",
|
|
"twa",
|
|
"twed",
|
|
"tws",
|
|
"vtbd",
|
|
"xbd",
|
|
} {
|
|
if hasNumericSuffix(name, prefix) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func hasNumericSuffix(name, prefix string) bool {
|
|
if !strings.HasPrefix(name, prefix) || len(name) == len(prefix) {
|
|
return false
|
|
}
|
|
|
|
for _, r := range name[len(prefix):] {
|
|
if r < '0' || r > '9' {
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// matchesDeviceExclude checks if a block device matches any exclusion pattern.
|
|
// Patterns can match against the device name (e.g., "sda", "nvme0n1") or the full
|
|
// path (e.g., "/dev/sda"). Supports:
|
|
// - Exact match: "sda" matches device named "sda"
|
|
// - Prefix pattern (ending with *): "nvme*" matches "nvme0n1", "nvme1n1", etc.
|
|
// - Contains pattern (surrounded by *): "*cache*" matches any device with "cache" in name
|
|
func matchesDeviceExclude(name, devicePath string, excludePatterns []string) bool {
|
|
if len(excludePatterns) == 0 {
|
|
return false
|
|
}
|
|
|
|
for _, pattern := range excludePatterns {
|
|
pattern = strings.TrimSpace(pattern)
|
|
if pattern == "" {
|
|
continue
|
|
}
|
|
|
|
// Contains pattern: *substring*
|
|
if strings.HasPrefix(pattern, "*") && strings.HasSuffix(pattern, "*") && len(pattern) > 2 {
|
|
substring := pattern[1 : len(pattern)-1]
|
|
if strings.Contains(name, substring) || strings.Contains(devicePath, substring) {
|
|
return true
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Prefix pattern: prefix*
|
|
if strings.HasSuffix(pattern, "*") {
|
|
prefix := pattern[:len(pattern)-1]
|
|
if strings.HasPrefix(name, prefix) || strings.HasPrefix(devicePath, prefix) {
|
|
return true
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Exact match against name or full path
|
|
if name == pattern || devicePath == pattern {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// collectDeviceSMART runs smartctl on a single device and parses the result.
|
|
func collectDeviceSMART(ctx context.Context, device string) (*DiskSMART, error) {
|
|
return collectSMARTTarget(ctx, smartctlTarget{Path: device})
|
|
}
|
|
|
|
func collectSMARTTarget(ctx context.Context, target smartctlTarget) (*DiskSMART, error) {
|
|
// Use timeout to avoid hanging on slow/unresponsive disks
|
|
cmdCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
|
defer cancel()
|
|
|
|
// Check if smartctl is available
|
|
smartctlPath, err := execLookPath("smartctl")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
attempts := smartctlProbeAttempts(target)
|
|
var firstParsed *DiskSMART
|
|
var firstStandby *DiskSMART
|
|
var lastErr error
|
|
|
|
for i, args := range attempts {
|
|
output, err := runCommandOutput(cmdCtx, smartctlPath, args...)
|
|
|
|
// smartctl returns non-zero exit codes for various conditions.
|
|
if err != nil {
|
|
if exitErr, ok := err.(*exec.ExitError); ok {
|
|
if exitErr.ExitCode() == smartctlStandbyExitStatus && len(output) == 0 {
|
|
standbyResult := &DiskSMART{
|
|
Device: target.displayName(),
|
|
Standby: true,
|
|
LastUpdated: timeNow(),
|
|
}
|
|
if runtimeGOOS == "freebsd" && i < len(attempts)-1 && target.DeviceType == "" {
|
|
if firstStandby == nil {
|
|
firstStandby = standbyResult
|
|
}
|
|
continue
|
|
}
|
|
return standbyResult, nil
|
|
}
|
|
if len(output) == 0 {
|
|
lastErr = err
|
|
continue
|
|
}
|
|
} else {
|
|
lastErr = err
|
|
continue
|
|
}
|
|
}
|
|
|
|
result, parseErr := parseSMARTOutput(output, target)
|
|
if parseErr != nil {
|
|
lastErr = parseErr
|
|
continue
|
|
}
|
|
result = enrichFreeBSDSCTTemperature(cmdCtx, smartctlPath, args, target, result)
|
|
if firstParsed == nil {
|
|
firstParsed = result
|
|
}
|
|
if !shouldRetryFreeBSDSMART(target.Path, result, i, len(attempts)) {
|
|
log.Debug().
|
|
Str("device", result.Device).
|
|
Str("model", result.Model).
|
|
Int("temperature", result.Temperature).
|
|
Str("health", result.Health).
|
|
Msg("Collected SMART data")
|
|
return result, nil
|
|
}
|
|
}
|
|
|
|
if firstParsed != nil {
|
|
log.Debug().
|
|
Str("device", firstParsed.Device).
|
|
Str("model", firstParsed.Model).
|
|
Int("temperature", firstParsed.Temperature).
|
|
Str("health", firstParsed.Health).
|
|
Msg("Collected SMART data")
|
|
return firstParsed, nil
|
|
}
|
|
if firstStandby != nil {
|
|
log.Debug().
|
|
Str("device", firstStandby.Device).
|
|
Msg("Collected SMART standby data")
|
|
return firstStandby, nil
|
|
}
|
|
if lastErr != nil {
|
|
if errors.Is(lastErr, errSMARTDataUnavailable) {
|
|
return nil, nil
|
|
}
|
|
return nil, lastErr
|
|
}
|
|
return nil, nil
|
|
}
|
|
|
|
func smartctlProbeAttempts(target smartctlTarget) [][]string {
|
|
device := target.Path
|
|
if target.DeviceType != "" {
|
|
return [][]string{
|
|
smartctlArgs(device, target.DeviceType),
|
|
}
|
|
}
|
|
|
|
if runtimeGOOS == "freebsd" {
|
|
deviceTypes := freeBSDSmartctlDeviceTypes(filepath.Base(device))
|
|
if len(deviceTypes) > 0 {
|
|
attempts := make([][]string, 0, len(deviceTypes)+1)
|
|
for _, deviceType := range deviceTypes {
|
|
attempts = append(attempts, smartctlArgs(device, deviceType))
|
|
}
|
|
return append(attempts, smartctlArgs(device, ""))
|
|
}
|
|
}
|
|
|
|
return [][]string{
|
|
smartctlArgs(device, ""),
|
|
}
|
|
}
|
|
|
|
func smartctlArgs(device, deviceType string) []string {
|
|
args := []string{}
|
|
if deviceType != "" {
|
|
args = append(args, "-d", deviceType)
|
|
}
|
|
args = append(args, "-n", "standby,"+strconv.Itoa(smartctlStandbyExitStatus), "-i", "-A", "-H", "--json=o", device)
|
|
return args
|
|
}
|
|
|
|
func smartctlArgsWithLog(args []string, logPage string) []string {
|
|
if logPage == "" || len(args) == 0 {
|
|
return append([]string(nil), args...)
|
|
}
|
|
for i := 0; i < len(args)-1; i++ {
|
|
if args[i] == "-l" && args[i+1] == logPage {
|
|
return append([]string(nil), args...)
|
|
}
|
|
}
|
|
|
|
deviceIndex := len(args) - 1
|
|
withLog := make([]string, 0, len(args)+2)
|
|
withLog = append(withLog, args[:deviceIndex]...)
|
|
withLog = append(withLog, "-l", logPage)
|
|
withLog = append(withLog, args[deviceIndex:]...)
|
|
return withLog
|
|
}
|
|
|
|
func freeBSDSmartctlDeviceTypes(device string) []string {
|
|
if runtimeGOOS != "freebsd" {
|
|
return nil
|
|
}
|
|
|
|
switch {
|
|
case strings.HasPrefix(device, "ada"), strings.HasPrefix(device, "ad"):
|
|
return []string{"sat"}
|
|
case strings.HasPrefix(device, "da"):
|
|
return []string{"sat,auto", "scsi"}
|
|
case strings.HasPrefix(device, "nda"), strings.HasPrefix(device, "nvd"), strings.HasPrefix(device, "nvme"):
|
|
return []string{"nvme"}
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func shouldRetryFreeBSDSMART(device string, result *DiskSMART, attemptIndex, attemptCount int) bool {
|
|
if runtimeGOOS != "freebsd" || attemptIndex >= attemptCount-1 || result == nil {
|
|
return false
|
|
}
|
|
if result.Temperature > 0 {
|
|
return false
|
|
}
|
|
return len(freeBSDSmartctlDeviceTypes(filepath.Base(device))) > 0
|
|
}
|
|
|
|
func enrichFreeBSDSCTTemperature(ctx context.Context, smartctlPath string, args []string, target smartctlTarget, current *DiskSMART) *DiskSMART {
|
|
if runtimeGOOS != "freebsd" || current == nil || current.Standby || current.Temperature > 0 {
|
|
return current
|
|
}
|
|
if len(freeBSDSmartctlDeviceTypes(filepath.Base(target.Path))) == 0 {
|
|
return current
|
|
}
|
|
|
|
sctArgs := smartctlArgsWithLog(args, "scttempsts")
|
|
if len(sctArgs) == len(args) {
|
|
return current
|
|
}
|
|
|
|
output, err := runCommandOutput(ctx, smartctlPath, sctArgs...)
|
|
if err != nil {
|
|
var exitErr *exec.ExitError
|
|
if !errors.As(err, &exitErr) || len(output) == 0 {
|
|
return current
|
|
}
|
|
}
|
|
|
|
sctResult, parseErr := parseSMARTOutput(output, target)
|
|
if parseErr != nil || sctResult == nil || sctResult.Temperature <= 0 {
|
|
return current
|
|
}
|
|
return sctResult
|
|
}
|
|
|
|
func parseSMARTOutput(output []byte, target smartctlTarget) (*DiskSMART, error) {
|
|
var smartData smartctlJSON
|
|
if err := json.Unmarshal(output, &smartData); err != nil {
|
|
return parseSMARTTextOutput(string(output), target)
|
|
}
|
|
|
|
result := &DiskSMART{
|
|
Device: target.displayName(),
|
|
Model: smartData.ModelName,
|
|
Serial: smartData.SerialNumber,
|
|
Type: detectDiskType(smartData),
|
|
LastUpdated: timeNow(),
|
|
}
|
|
result.Standby = isStandbyPowerMode(smartData.PowerMode)
|
|
|
|
if smartData.WWN.NAA != 0 {
|
|
result.WWN = formatWWN(smartData.WWN.NAA, smartData.WWN.OUI, smartData.WWN.ID)
|
|
}
|
|
|
|
if smartData.Temperature.Current > 0 {
|
|
result.Temperature = smartData.Temperature.Current
|
|
} else if smartData.NVMeSmartHealthInformationLog.Temperature > 0 {
|
|
result.Temperature = smartData.NVMeSmartHealthInformationLog.Temperature
|
|
} else if smartData.ATASCTStatus.Current.Value > 0 {
|
|
result.Temperature = smartData.ATASCTStatus.Current.Value
|
|
} else {
|
|
for _, attr := range smartData.ATASmartAttributes.Table {
|
|
if attr.ID == 194 || attr.ID == 190 {
|
|
temp := parseRawValue(attr.Raw.String, attr.Raw.Value)
|
|
if temp > 0 && temp < 150 {
|
|
result.Temperature = int(temp)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if smartData.SmartStatus != nil {
|
|
if smartData.SmartStatus.Passed {
|
|
result.Health = "PASSED"
|
|
} else {
|
|
result.Health = "FAILED"
|
|
}
|
|
}
|
|
|
|
applySMARTTextFallback(result, parseSMARTTextFallback(strings.Join(smartData.Smartctl.Output, "\n")))
|
|
result.Attributes = parseSMARTAttributes(&smartData, result.Type)
|
|
if result.Health == "" && result.Temperature == 0 && result.Attributes == nil && !result.Standby {
|
|
return nil, errSMARTDataUnavailable
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
func parseSMARTTextOutput(text string, target smartctlTarget) (*DiskSMART, error) {
|
|
fallback := parseSMARTTextFallback(text)
|
|
result := &DiskSMART{
|
|
Device: target.displayName(),
|
|
Model: fallback.Model,
|
|
Serial: fallback.Serial,
|
|
Type: fallback.Type,
|
|
Temperature: fallback.Temperature,
|
|
Health: fallback.Health,
|
|
Standby: fallback.Standby,
|
|
LastUpdated: timeNow(),
|
|
}
|
|
if result.Type == "" {
|
|
if target.DeviceType == "nvme" || strings.HasPrefix(filepath.Base(target.Path), "nvme") || strings.HasPrefix(filepath.Base(target.Path), "nvd") || strings.HasPrefix(filepath.Base(target.Path), "nda") {
|
|
result.Type = "nvme"
|
|
} else {
|
|
result.Type = "sata"
|
|
}
|
|
}
|
|
if result.Health == "" && result.Temperature == 0 && !result.Standby {
|
|
return nil, errSMARTDataUnavailable
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
func applySMARTTextFallback(result *DiskSMART, fallback smartTextFallback) {
|
|
if result == nil {
|
|
return
|
|
}
|
|
if result.Model == "" && fallback.Model != "" {
|
|
result.Model = fallback.Model
|
|
}
|
|
if result.Serial == "" && fallback.Serial != "" {
|
|
result.Serial = fallback.Serial
|
|
}
|
|
if result.Type == "" && fallback.Type != "" {
|
|
result.Type = fallback.Type
|
|
}
|
|
if result.Health == "" && fallback.Health != "" {
|
|
result.Health = fallback.Health
|
|
}
|
|
if result.Temperature == 0 && fallback.Temperature > 0 {
|
|
result.Temperature = fallback.Temperature
|
|
}
|
|
if !result.Standby && fallback.Standby {
|
|
result.Standby = true
|
|
}
|
|
}
|
|
|
|
func parseSMARTTextFallback(text string) smartTextFallback {
|
|
var fallback smartTextFallback
|
|
for _, rawLine := range strings.Split(text, "\n") {
|
|
line := strings.TrimSpace(rawLine)
|
|
if line == "" {
|
|
continue
|
|
}
|
|
lower := strings.ToLower(line)
|
|
switch {
|
|
case strings.HasPrefix(lower, "device model:"):
|
|
fallback.Model = strings.TrimSpace(line[len("Device Model:"):])
|
|
case strings.HasPrefix(lower, "model number:"):
|
|
if fallback.Model == "" {
|
|
fallback.Model = strings.TrimSpace(line[len("Model Number:"):])
|
|
}
|
|
case strings.HasPrefix(lower, "product:"):
|
|
if fallback.Model == "" {
|
|
fallback.Model = strings.TrimSpace(line[len("Product:"):])
|
|
}
|
|
case strings.HasPrefix(lower, "serial number:"):
|
|
fallback.Serial = strings.TrimSpace(line[len("Serial Number:"):])
|
|
case strings.Contains(lower, "device is in standby mode"):
|
|
fallback.Standby = true
|
|
case strings.HasPrefix(lower, "smart overall-health self-assessment test result:"):
|
|
fallback.Health = parseSMARTHealthText(line)
|
|
case strings.HasPrefix(lower, "smart health status:"):
|
|
if fallback.Health == "" {
|
|
fallback.Health = parseSMARTHealthText(line)
|
|
}
|
|
case strings.Contains(lower, "transport protocol:") && strings.Contains(lower, "nvme"):
|
|
fallback.Type = "nvme"
|
|
case strings.Contains(lower, "transport protocol:") && strings.Contains(lower, "sas"):
|
|
fallback.Type = "sas"
|
|
case strings.Contains(lower, "sata version is:") || strings.Contains(lower, "ata version is:"):
|
|
if fallback.Type == "" {
|
|
fallback.Type = "sata"
|
|
}
|
|
}
|
|
|
|
if fallback.Temperature == 0 {
|
|
if matches := smartTextCurrentTempRE.FindStringSubmatch(line); len(matches) == 2 {
|
|
if temp, err := strconv.Atoi(matches[1]); err == nil && temp > 0 && temp < 150 {
|
|
fallback.Temperature = temp
|
|
continue
|
|
}
|
|
}
|
|
if matches := smartTextTemperatureRE.FindStringSubmatch(line); len(matches) == 2 && strings.Contains(lower, "celsius") {
|
|
if temp, err := strconv.Atoi(matches[1]); err == nil && temp > 0 && temp < 150 {
|
|
fallback.Temperature = temp
|
|
continue
|
|
}
|
|
}
|
|
if matches := smartTextTempAttributeRE.FindStringSubmatch(line); len(matches) == 3 {
|
|
if temp, err := strconv.Atoi(matches[2]); err == nil && temp > 0 && temp < 150 {
|
|
fallback.Temperature = temp
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
func parseSMARTHealthText(line string) string {
|
|
lower := strings.ToLower(line)
|
|
switch {
|
|
case strings.Contains(lower, "passed"), strings.Contains(lower, "ok"):
|
|
return "PASSED"
|
|
case strings.Contains(lower, "failed"):
|
|
return "FAILED"
|
|
default:
|
|
return ""
|
|
}
|
|
}
|
|
|
|
func isStandbyPowerMode(powerMode string) bool {
|
|
mode := strings.ToLower(strings.TrimSpace(powerMode))
|
|
return strings.Contains(mode, "standby") || strings.Contains(mode, "sleep")
|
|
}
|
|
|
|
// parseSMARTAttributes extracts normalized SMART attributes from smartctl JSON output.
|
|
func parseSMARTAttributes(data *smartctlJSON, diskType string) *SMARTAttributes {
|
|
attrs := &SMARTAttributes{}
|
|
hasData := false
|
|
|
|
if diskType == "nvme" {
|
|
nvmeLog := &data.NVMeSmartHealthInformationLog
|
|
// NVMe health log fields are always present when the log is available.
|
|
// We use simple heuristics: power_on_hours > 0 means the log was populated.
|
|
if nvmeLog.PowerOnHours > 0 || nvmeLog.PowerCycles > 0 || nvmeLog.AvailableSpare > 0 {
|
|
hasData = true
|
|
poh := nvmeLog.PowerOnHours
|
|
attrs.PowerOnHours = &poh
|
|
pc := nvmeLog.PowerCycles
|
|
attrs.PowerCycles = &pc
|
|
pu := nvmeLog.PercentageUsed
|
|
attrs.PercentageUsed = &pu
|
|
as := nvmeLog.AvailableSpare
|
|
attrs.AvailableSpare = &as
|
|
me := nvmeLog.MediaErrors
|
|
attrs.MediaErrors = &me
|
|
us := nvmeLog.UnsafeShutdowns
|
|
attrs.UnsafeShutdowns = &us
|
|
}
|
|
} else {
|
|
// SATA / SAS — iterate the ATA attributes table
|
|
for _, attr := range data.ATASmartAttributes.Table {
|
|
hasData = true
|
|
raw := parseRawValue(attr.Raw.String, attr.Raw.Value)
|
|
switch attr.ID {
|
|
case 5: // Reallocated Sector Count
|
|
v := raw
|
|
attrs.ReallocatedSectors = &v
|
|
case 9: // Power-On Hours
|
|
v := raw
|
|
attrs.PowerOnHours = &v
|
|
case 12: // Power Cycle Count
|
|
v := raw
|
|
attrs.PowerCycles = &v
|
|
case 197: // Current Pending Sector Count
|
|
v := raw
|
|
attrs.PendingSectors = &v
|
|
case 198: // Offline Uncorrectable
|
|
v := raw
|
|
attrs.OfflineUncorrectable = &v
|
|
case 199: // UDMA CRC Error Count
|
|
v := raw
|
|
attrs.UDMACRCErrors = &v
|
|
}
|
|
}
|
|
}
|
|
|
|
if !hasData {
|
|
return nil
|
|
}
|
|
return attrs
|
|
}
|
|
|
|
// parseRawValue extracts the primary integer from a SMART attribute's raw string.
|
|
// Some drives (notably Seagate) pack vendor-specific data in the upper bytes of
|
|
// the 48-bit raw value, making raw.value unreliable. For example, Power_On_Hours
|
|
// may report raw.value=150323855943 while raw.string="16951 (223 173 0)" where
|
|
// only 16951 is the actual hours. Falls back to rawValue if string parsing fails.
|
|
func parseRawValue(rawString string, rawValue int64) int64 {
|
|
s := strings.TrimSpace(rawString)
|
|
if s == "" {
|
|
return rawValue
|
|
}
|
|
end := 0
|
|
for end < len(s) && s[end] >= '0' && s[end] <= '9' {
|
|
end++
|
|
}
|
|
if end == 0 {
|
|
return rawValue
|
|
}
|
|
v, err := strconv.ParseInt(s[:end], 10, 64)
|
|
if err != nil {
|
|
return rawValue
|
|
}
|
|
return v
|
|
}
|
|
|
|
// detectDiskType determines the disk transport type from smartctl output.
|
|
func detectDiskType(data smartctlJSON) string {
|
|
protocol := strings.ToLower(data.Device.Protocol)
|
|
switch {
|
|
case strings.Contains(protocol, "nvme"):
|
|
return "nvme"
|
|
case strings.Contains(protocol, "sas"):
|
|
return "sas"
|
|
case strings.Contains(protocol, "ata"), strings.Contains(protocol, "sata"):
|
|
return "sata"
|
|
default:
|
|
// Try to infer from device type
|
|
devType := strings.ToLower(data.Device.Type)
|
|
if strings.Contains(devType, "nvme") {
|
|
return "nvme"
|
|
}
|
|
return "sata" // default
|
|
}
|
|
}
|
|
|
|
// formatWWN formats WWN components into a standard string.
|
|
func formatWWN(naa, oui, id uint64) string {
|
|
// Format as hex string: naa-oui-id
|
|
return strconv.FormatUint(naa, 16) + "-" +
|
|
strconv.FormatUint(oui, 16) + "-" +
|
|
strconv.FormatUint(id, 16)
|
|
}
|