mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-09 19:32:24 +00:00
733 lines
21 KiB
Go
733 lines
21 KiB
Go
package hostagent
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
const smartctlComponent = "smartctl_collector"
|
|
const maxCommandOutputBytes = 1 << 20 // 1 MiB
|
|
|
|
var (
|
|
errCommandOutputTooLarge = errors.New("command output exceeds size limit")
|
|
errSMARTDataUnavailable = errors.New("smart data unavailable for device")
|
|
execLookPath = exec.LookPath
|
|
smartRunCommandOutput = func(ctx context.Context, name string, args ...string) ([]byte, error) {
|
|
return runCommandOutputLimited(ctx, maxCommandOutputBytes, name, args...)
|
|
}
|
|
readDir = os.ReadDir
|
|
osStat = os.Stat
|
|
|
|
timeNow = time.Now
|
|
runtimeGOOS = runtime.GOOS
|
|
)
|
|
|
|
// DiskSMART represents S.M.A.R.T. data for a single disk.
|
|
type DiskSMART struct {
|
|
Device string `json:"device"` // Device path (e.g., /dev/sda)
|
|
Model string `json:"model,omitempty"` // Disk model
|
|
Serial string `json:"serial,omitempty"` // Serial number
|
|
WWN string `json:"wwn,omitempty"` // World Wide Name
|
|
Type string `json:"type,omitempty"` // Transport type: sata, sas, nvme
|
|
Temperature int `json:"temperature"` // Temperature in Celsius
|
|
Health string `json:"health,omitempty"` // PASSED, FAILED, UNKNOWN
|
|
Standby bool `json:"standby,omitempty"` // True if disk was in standby
|
|
Attributes *SMARTAttributes `json:"attributes,omitempty"`
|
|
LastUpdated time.Time `json:"lastUpdated"` // When this reading was taken
|
|
}
|
|
|
|
// SMARTAttributes holds normalized SMART attributes for both SATA and NVMe disks.
|
|
// Pointer fields distinguish zero from absent.
|
|
type SMARTAttributes struct {
|
|
// Common attributes
|
|
PowerOnHours *int64 `json:"powerOnHours,omitempty"`
|
|
PowerCycles *int64 `json:"powerCycles,omitempty"`
|
|
|
|
// SATA-specific (by ATA attribute ID)
|
|
ReallocatedSectors *int64 `json:"reallocatedSectors,omitempty"` // ID 5
|
|
PendingSectors *int64 `json:"pendingSectors,omitempty"` // ID 197
|
|
OfflineUncorrectable *int64 `json:"offlineUncorrectable,omitempty"` // ID 198
|
|
UDMACRCErrors *int64 `json:"udmaCrcErrors,omitempty"` // ID 199
|
|
|
|
// NVMe-specific
|
|
PercentageUsed *int `json:"percentageUsed,omitempty"`
|
|
AvailableSpare *int `json:"availableSpare,omitempty"`
|
|
MediaErrors *int64 `json:"mediaErrors,omitempty"`
|
|
UnsafeShutdowns *int64 `json:"unsafeShutdowns,omitempty"`
|
|
}
|
|
|
|
type smartStatusJSON struct {
|
|
Passed bool `json:"passed"`
|
|
}
|
|
|
|
type nvmeSmartHealthInformationLogJSON struct {
|
|
Temperature int `json:"temperature"`
|
|
AvailableSpare int `json:"available_spare"`
|
|
PercentageUsed int `json:"percentage_used"`
|
|
PowerOnHours int64 `json:"power_on_hours"`
|
|
UnsafeShutdowns int64 `json:"unsafe_shutdowns"`
|
|
MediaErrors int64 `json:"media_errors"`
|
|
PowerCycles int64 `json:"power_cycles"`
|
|
}
|
|
|
|
// smartctlJSON represents the JSON output from smartctl --json.
|
|
// lsblkJSON is the JSON output from lsblk -J.
|
|
type lsblkJSON struct {
|
|
Blockdevices []lsblkDevice `json:"blockdevices"`
|
|
}
|
|
|
|
type lsblkDevice struct {
|
|
Name string `json:"name"`
|
|
Type string `json:"type"`
|
|
Tran string `json:"tran"`
|
|
Model string `json:"model"`
|
|
Vendor string `json:"vendor"`
|
|
Subsystems string `json:"subsystems"`
|
|
}
|
|
|
|
// linuxSMARTVirtualPrefixes are device name prefixes for virtual/logical
|
|
// devices that cannot provide SMART data.
|
|
var linuxSMARTVirtualPrefixes = []string{
|
|
"dm-",
|
|
"drbd",
|
|
"loop",
|
|
"md",
|
|
"nbd",
|
|
"pmem",
|
|
"ram",
|
|
"rbd",
|
|
"vd",
|
|
"xvd",
|
|
"zd",
|
|
"zram",
|
|
}
|
|
|
|
// linuxSMARTVirtualMetadataTokens are vendor/model substrings indicating a
|
|
// virtual disk that cannot provide SMART data.
|
|
var linuxSMARTVirtualMetadataTokens = []string{
|
|
"hyper-v",
|
|
"msft virtual",
|
|
"parallels",
|
|
"qemu",
|
|
"vbox",
|
|
"virtual disk",
|
|
"virtual hd",
|
|
"virtualbox",
|
|
"vmware",
|
|
}
|
|
|
|
// linuxSMARTVirtualSubsystemTokens are lsblk SUBSYSTEMS substrings
|
|
// indicating virtual block devices.
|
|
var linuxSMARTVirtualSubsystemTokens = []string{
|
|
"drbd",
|
|
"nbd",
|
|
"vmbus",
|
|
"virtio",
|
|
"xen",
|
|
"zfs",
|
|
}
|
|
|
|
type smartctlJSON struct {
|
|
Device struct {
|
|
Name string `json:"name"`
|
|
Type string `json:"type"`
|
|
Protocol string `json:"protocol"`
|
|
} `json:"device"`
|
|
ModelFamily string `json:"model_family"`
|
|
ModelName string `json:"model_name"`
|
|
SerialNumber string `json:"serial_number"`
|
|
WWN struct {
|
|
NAA uint64 `json:"naa"`
|
|
OUI uint64 `json:"oui"`
|
|
ID uint64 `json:"id"`
|
|
} `json:"wwn"`
|
|
SmartStatus *struct {
|
|
Passed bool `json:"passed"`
|
|
} `json:"smart_status,omitempty"`
|
|
Temperature struct {
|
|
Current int `json:"current"`
|
|
} `json:"temperature"`
|
|
// ATA SMART attributes table
|
|
ATASmartAttributes struct {
|
|
Table []struct {
|
|
ID int `json:"id"`
|
|
Name string `json:"name"`
|
|
Value int `json:"value"`
|
|
Worst int `json:"worst"`
|
|
Thresh int `json:"thresh"`
|
|
Raw struct {
|
|
Value int64 `json:"value"`
|
|
String string `json:"string"`
|
|
} `json:"raw"`
|
|
} `json:"table"`
|
|
} `json:"ata_smart_attributes"`
|
|
// NVMe-specific health information
|
|
NVMeSmartHealthInformationLog *nvmeSmartHealthInformationLogJSON `json:"nvme_smart_health_information_log"`
|
|
PowerMode string `json:"power_mode"`
|
|
}
|
|
|
|
// CollectSMARTLocal collects S.M.A.R.T. data from all local block devices.
|
|
// The diskExclude parameter specifies patterns for devices to skip (e.g., "sda", "/dev/nvme*", "*cache*").
|
|
func CollectSMARTLocal(ctx context.Context, diskExclude []string) ([]DiskSMART, error) {
|
|
// List block devices
|
|
devices, err := listBlockDevices(ctx, diskExclude)
|
|
if err != nil {
|
|
log.Debug().Err(err).Msg("failed to list block devices for SMART collection")
|
|
return nil, fmt.Errorf("list block devices for SMART collection: %w", err)
|
|
}
|
|
|
|
if len(devices) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
var results []DiskSMART
|
|
for _, dev := range devices {
|
|
smart, err := collectDeviceSMART(ctx, dev)
|
|
if err != nil {
|
|
if errors.Is(err, errSMARTDataUnavailable) {
|
|
log.Debug().
|
|
Str("component", smartctlComponent).
|
|
Str("action", "skip_no_smart_data").
|
|
Str("device", dev).
|
|
Msg("Device returned no usable SMART data, skipping")
|
|
} else {
|
|
log.Debug().
|
|
Str("component", smartctlComponent).
|
|
Str("action", "collect_device_smart_failed").
|
|
Str("device", dev).
|
|
Err(err).
|
|
Msg("Failed to collect SMART data for device")
|
|
}
|
|
continue
|
|
}
|
|
if smart != nil {
|
|
results = append(results, *smart)
|
|
}
|
|
}
|
|
|
|
log.Debug().
|
|
Str("component", smartctlComponent).
|
|
Str("action", "collect_local_complete").
|
|
Int("devices_discovered", len(devices)).
|
|
Int("devices_collected", len(results)).
|
|
Msg("Completed SMART collection for local devices")
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// listBlockDevices returns a list of block devices suitable for SMART queries.
|
|
// Devices matching any of the diskExclude patterns are skipped.
|
|
func listBlockDevices(ctx context.Context, diskExclude []string) ([]string, error) {
|
|
if runtimeGOOS == "freebsd" {
|
|
devices, err := listBlockDevicesFreeBSD(ctx, diskExclude)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("list FreeBSD block devices: %w", err)
|
|
}
|
|
return devices, nil
|
|
}
|
|
devices, err := listBlockDevicesLinux(ctx, diskExclude)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("list Linux block devices: %w", err)
|
|
}
|
|
return devices, nil
|
|
}
|
|
|
|
// listBlockDevicesLinux uses lsblk JSON output to find physical disks on Linux,
|
|
// filtering out virtual/logical devices that cannot provide SMART data.
|
|
func listBlockDevicesLinux(ctx context.Context, diskExclude []string) ([]string, error) {
|
|
output, err := smartRunCommandOutput(ctx, "lsblk", "-J", "-d", "-o", "NAME,TYPE,TRAN,MODEL,VENDOR,SUBSYSTEMS")
|
|
if err != nil {
|
|
// Fall back to sysfs enumeration for minimal hosts/images where lsblk is unavailable.
|
|
log.Debug().Err(err).Msg("lsblk device discovery failed, falling back to /sys/block")
|
|
devices, fallbackErr := listBlockDevicesLinuxSysfs(diskExclude)
|
|
if fallbackErr != nil {
|
|
return nil, fmt.Errorf("linux block-device discovery failed: lsblk error: %w; /sys/block fallback error: %v", err, fallbackErr)
|
|
}
|
|
return devices, nil
|
|
}
|
|
|
|
var data lsblkJSON
|
|
if err := json.Unmarshal(output, &data); err != nil {
|
|
return nil, fmt.Errorf("parse lsblk JSON: %w", err)
|
|
}
|
|
|
|
var devices []string
|
|
for _, disk := range data.Blockdevices {
|
|
if strings.TrimSpace(disk.Name) == "" {
|
|
continue
|
|
}
|
|
|
|
devicePath := "/dev/" + disk.Name
|
|
if reason := linuxSMARTSkipReason(disk); reason != "" {
|
|
log.Debug().
|
|
Str("component", smartctlComponent).
|
|
Str("action", "skip_virtual_device").
|
|
Str("device", devicePath).
|
|
Str("reason", reason).
|
|
Msg("Skipping non-physical device for SMART collection")
|
|
continue
|
|
}
|
|
if matchesDeviceExclude(disk.Name, devicePath, diskExclude) {
|
|
log.Debug().
|
|
Str("component", smartctlComponent).
|
|
Str("action", "skip_excluded_device").
|
|
Str("device", devicePath).
|
|
Msg("Skipping excluded device for SMART collection")
|
|
continue
|
|
}
|
|
devices = append(devices, devicePath)
|
|
}
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
// linuxSMARTSkipReason returns a human-readable reason if the device should be
|
|
// skipped for SMART collection, or "" if the device is a real physical disk.
|
|
func linuxSMARTSkipReason(device lsblkDevice) string {
|
|
if !strings.EqualFold(strings.TrimSpace(device.Type), "disk") {
|
|
return "not a whole disk"
|
|
}
|
|
|
|
name := strings.ToLower(strings.TrimSpace(device.Name))
|
|
for _, prefix := range linuxSMARTVirtualPrefixes {
|
|
if strings.HasPrefix(name, prefix) {
|
|
return "virtual/logical device prefix"
|
|
}
|
|
}
|
|
|
|
transport := strings.ToLower(strings.TrimSpace(device.Tran))
|
|
if transport == "virtio" {
|
|
return "virtio transport"
|
|
}
|
|
|
|
subsystems := strings.ToLower(strings.TrimSpace(device.Subsystems))
|
|
for _, token := range linuxSMARTVirtualSubsystemTokens {
|
|
if strings.Contains(subsystems, token) {
|
|
return "virtual/logical subsystem"
|
|
}
|
|
}
|
|
|
|
metadata := strings.ToLower(strings.TrimSpace(device.Vendor + " " + device.Model))
|
|
for _, token := range linuxSMARTVirtualMetadataTokens {
|
|
if strings.Contains(metadata, token) {
|
|
return "virtual disk model/vendor signature"
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
func listBlockDevicesLinuxSysfs(diskExclude []string) ([]string, error) {
|
|
entries, err := readDir("/sys/block")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var devices []string
|
|
for _, entry := range entries {
|
|
name := strings.TrimSpace(entry.Name())
|
|
if name == "" || isVirtualLinuxBlockDevice(name) {
|
|
continue
|
|
}
|
|
|
|
// Keep only concrete device-backed entries and skip synthetic block devices.
|
|
if _, err := osStat(filepath.Join("/sys/block", name, "device")); err != nil {
|
|
continue
|
|
}
|
|
|
|
devicePath := "/dev/" + name
|
|
if matchesDeviceExclude(name, devicePath, diskExclude) {
|
|
log.Debug().Str("device", devicePath).Msg("Skipping excluded device for SMART collection")
|
|
continue
|
|
}
|
|
devices = append(devices, devicePath)
|
|
}
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
func isVirtualLinuxBlockDevice(name string) bool {
|
|
for _, prefix := range linuxSMARTVirtualPrefixes {
|
|
if strings.HasPrefix(name, prefix) {
|
|
return true
|
|
}
|
|
}
|
|
return strings.HasPrefix(name, "fd") ||
|
|
strings.HasPrefix(name, "sr")
|
|
}
|
|
|
|
// listBlockDevicesFreeBSD uses sysctl kern.disks to find disks on FreeBSD.
|
|
func listBlockDevicesFreeBSD(ctx context.Context, diskExclude []string) ([]string, error) {
|
|
output, err := smartRunCommandOutput(ctx, "sysctl", "-n", "kern.disks")
|
|
if err != nil {
|
|
return nil, fmt.Errorf("run sysctl kern.disks: %w", err)
|
|
}
|
|
|
|
var devices []string
|
|
for _, name := range strings.Fields(strings.TrimSpace(string(output))) {
|
|
if name == "" {
|
|
continue
|
|
}
|
|
devicePath := "/dev/" + name
|
|
if matchesDeviceExclude(name, devicePath, diskExclude) {
|
|
log.Debug().
|
|
Str("component", smartctlComponent).
|
|
Str("action", "skip_excluded_device").
|
|
Str("device", devicePath).
|
|
Msg("Skipping excluded device for SMART collection")
|
|
continue
|
|
}
|
|
devices = append(devices, devicePath)
|
|
}
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
// matchesDeviceExclude checks if a block device matches any exclusion pattern.
|
|
// Patterns can match against the device name (e.g., "sda", "nvme0n1") or the full
|
|
// path (e.g., "/dev/sda"). Supports:
|
|
// - Exact match: "sda" matches device named "sda"
|
|
// - Prefix pattern (ending with *): "nvme*" matches "nvme0n1", "nvme1n1", etc.
|
|
// - Contains pattern (surrounded by *): "*cache*" matches any device with "cache" in name
|
|
func matchesDeviceExclude(name, devicePath string, excludePatterns []string) bool {
|
|
if len(excludePatterns) == 0 {
|
|
return false
|
|
}
|
|
|
|
for _, pattern := range excludePatterns {
|
|
pattern = strings.TrimSpace(pattern)
|
|
if pattern == "" {
|
|
continue
|
|
}
|
|
|
|
// Contains pattern: *substring*
|
|
if strings.HasPrefix(pattern, "*") && strings.HasSuffix(pattern, "*") && len(pattern) > 2 {
|
|
substring := pattern[1 : len(pattern)-1]
|
|
if strings.Contains(name, substring) || strings.Contains(devicePath, substring) {
|
|
return true
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Prefix pattern: prefix*
|
|
if strings.HasSuffix(pattern, "*") {
|
|
prefix := pattern[:len(pattern)-1]
|
|
if strings.HasPrefix(name, prefix) || strings.HasPrefix(devicePath, prefix) {
|
|
return true
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Exact match against name or full path
|
|
if name == pattern || devicePath == pattern {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// collectDeviceSMART runs smartctl on a single device and parses the result.
|
|
func collectDeviceSMART(ctx context.Context, device string) (*DiskSMART, error) {
|
|
// Use timeout to avoid hanging on slow/unresponsive disks
|
|
cmdCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
|
defer cancel()
|
|
|
|
// Check if smartctl is available
|
|
smartctlPath, err := execLookPath("smartctl")
|
|
if err != nil {
|
|
return nil, fmt.Errorf("look up smartctl binary: %w", err)
|
|
}
|
|
|
|
// Run smartctl with standby check to avoid waking sleeping drives
|
|
// -n standby: don't check if drive is in standby (return exit code 2)
|
|
// -i: device info
|
|
// -A: attributes (for temperature)
|
|
// --json=o: output original smartctl JSON format
|
|
output, err := smartRunCommandOutput(cmdCtx, smartctlPath, "-n", "standby", "-i", "-A", "-H", "--json=o", device)
|
|
|
|
// smartctl returns non-zero exit codes for various conditions
|
|
// Exit code 2 means drive is in standby - that's okay
|
|
if err != nil {
|
|
if exitErr, ok := err.(*exec.ExitError); ok {
|
|
exitCode := exitErr.ExitCode()
|
|
// Check for standby (bit 1 set in exit status)
|
|
if exitCode&2 != 0 {
|
|
log.Debug().
|
|
Str("component", smartctlComponent).
|
|
Str("action", "device_in_standby").
|
|
Str("device", filepath.Base(device)).
|
|
Msg("Skipping SMART collection for standby device")
|
|
return &DiskSMART{
|
|
Device: filepath.Base(device),
|
|
Standby: true,
|
|
LastUpdated: timeNow(),
|
|
}, nil
|
|
}
|
|
// Other exit codes might still have valid JSON output
|
|
// Continue parsing if we got output
|
|
if len(output) == 0 {
|
|
return nil, fmt.Errorf("run smartctl for %s: %w", device, err)
|
|
}
|
|
log.Debug().
|
|
Str("component", smartctlComponent).
|
|
Str("action", "collect_device_smart_nonzero_exit").
|
|
Str("device", filepath.Base(device)).
|
|
Int("exit_code", exitCode).
|
|
Msg("smartctl returned non-zero exit status with JSON output")
|
|
} else {
|
|
return nil, fmt.Errorf("run smartctl for %s: %w", device, err)
|
|
}
|
|
}
|
|
|
|
// Parse JSON output
|
|
var smartData smartctlJSON
|
|
if err := json.Unmarshal(output, &smartData); err != nil {
|
|
return nil, fmt.Errorf("parse smartctl JSON for %s: %w", device, err)
|
|
}
|
|
|
|
result := &DiskSMART{
|
|
Device: filepath.Base(device),
|
|
Model: smartData.ModelName,
|
|
Serial: smartData.SerialNumber,
|
|
Type: detectDiskType(smartData),
|
|
LastUpdated: timeNow(),
|
|
}
|
|
|
|
// Build WWN string if available
|
|
if smartData.WWN.NAA != 0 {
|
|
result.WWN = formatWWN(smartData.WWN.NAA, smartData.WWN.OUI, smartData.WWN.ID)
|
|
}
|
|
|
|
// Get temperature (different location for NVMe vs SATA).
|
|
// Try top-level fields first, then fall back to ATA attributes 194/190.
|
|
if smartData.Temperature.Current > 0 {
|
|
result.Temperature = smartData.Temperature.Current
|
|
} else if smartData.NVMeSmartHealthInformationLog != nil && smartData.NVMeSmartHealthInformationLog.Temperature > 0 {
|
|
result.Temperature = smartData.NVMeSmartHealthInformationLog.Temperature
|
|
} else {
|
|
// Fallback: extract from ATA SMART attributes 194 (Temperature_Celsius)
|
|
// or 190 (Airflow_Temperature_Cel). Some drives don't populate the
|
|
// top-level temperature field.
|
|
for _, attr := range smartData.ATASmartAttributes.Table {
|
|
if attr.ID == 194 || attr.ID == 190 {
|
|
temp := int(parseRawValue(attr.Raw.String, attr.Raw.Value))
|
|
if temp > 0 && temp < 150 { // sanity: valid operating range
|
|
result.Temperature = temp
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Get health status
|
|
if smartData.SmartStatus == nil {
|
|
result.Health = "UNKNOWN"
|
|
} else if smartData.SmartStatus.Passed {
|
|
result.Health = "PASSED"
|
|
} else {
|
|
result.Health = "FAILED"
|
|
}
|
|
|
|
// Parse SMART attributes
|
|
result.Attributes = parseSMARTAttributes(&smartData, result.Type)
|
|
|
|
// If the device returned no useful data at all, treat as unavailable
|
|
// rather than reporting a misleading UNKNOWN/FAILED entry.
|
|
if (result.Health == "" || result.Health == "UNKNOWN") && result.Temperature == 0 && result.Attributes == nil {
|
|
return nil, errSMARTDataUnavailable
|
|
}
|
|
|
|
log.Debug().
|
|
Str("component", smartctlComponent).
|
|
Str("action", "collect_device_smart_success").
|
|
Str("device", result.Device).
|
|
Str("type", result.Type).
|
|
Str("model", result.Model).
|
|
Int("temperature", result.Temperature).
|
|
Str("health", result.Health).
|
|
Msg("collected SMART data")
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// parseSMARTAttributes extracts normalized SMART attributes from smartctl JSON output.
|
|
func parseSMARTAttributes(data *smartctlJSON, diskType string) *SMARTAttributes {
|
|
attrs := &SMARTAttributes{}
|
|
hasData := false
|
|
|
|
if diskType == "nvme" {
|
|
nvmeLog := data.NVMeSmartHealthInformationLog
|
|
if nvmeLog != nil {
|
|
hasData = true
|
|
poh := nvmeLog.PowerOnHours
|
|
attrs.PowerOnHours = &poh
|
|
pc := nvmeLog.PowerCycles
|
|
attrs.PowerCycles = &pc
|
|
pu := nvmeLog.PercentageUsed
|
|
attrs.PercentageUsed = &pu
|
|
as := nvmeLog.AvailableSpare
|
|
attrs.AvailableSpare = &as
|
|
me := nvmeLog.MediaErrors
|
|
attrs.MediaErrors = &me
|
|
us := nvmeLog.UnsafeShutdowns
|
|
attrs.UnsafeShutdowns = &us
|
|
}
|
|
} else {
|
|
// SATA / SAS — iterate the ATA attributes table
|
|
for _, attr := range data.ATASmartAttributes.Table {
|
|
hasData = true
|
|
raw := parseRawValue(attr.Raw.String, attr.Raw.Value)
|
|
switch attr.ID {
|
|
case 5: // Reallocated Sector Count
|
|
v := raw
|
|
attrs.ReallocatedSectors = &v
|
|
case 9: // Power-On Hours
|
|
v := raw
|
|
attrs.PowerOnHours = &v
|
|
case 12: // Power Cycle Count
|
|
v := raw
|
|
attrs.PowerCycles = &v
|
|
case 197: // Current Pending Sector Count
|
|
v := raw
|
|
attrs.PendingSectors = &v
|
|
case 198: // Offline Uncorrectable
|
|
v := raw
|
|
attrs.OfflineUncorrectable = &v
|
|
case 199: // UDMA CRC Error Count
|
|
v := raw
|
|
attrs.UDMACRCErrors = &v
|
|
}
|
|
}
|
|
}
|
|
|
|
if !hasData {
|
|
return nil
|
|
}
|
|
return attrs
|
|
}
|
|
|
|
// parseRawValue extracts the primary integer from a SMART attribute's raw string.
|
|
// Some drives (notably Seagate) pack vendor-specific data in the upper bytes of
|
|
// the 48-bit raw value, making raw.value unreliable. For example, Power_On_Hours
|
|
// may report raw.value=150323855943 while raw.string="16951 (223 173 0)" where
|
|
// only 16951 is the actual hours. Falls back to rawValue if string parsing fails.
|
|
func parseRawValue(rawString string, rawValue int64) int64 {
|
|
s := strings.TrimSpace(rawString)
|
|
if s == "" {
|
|
return rawValue
|
|
}
|
|
end := 0
|
|
for end < len(s) && s[end] >= '0' && s[end] <= '9' {
|
|
end++
|
|
}
|
|
if end == 0 {
|
|
return rawValue
|
|
}
|
|
v, err := strconv.ParseInt(s[:end], 10, 64)
|
|
if err != nil {
|
|
return rawValue
|
|
}
|
|
return v
|
|
}
|
|
|
|
// detectDiskType determines the disk transport type from smartctl output.
|
|
func detectDiskType(data smartctlJSON) string {
|
|
protocol := strings.ToLower(data.Device.Protocol)
|
|
switch {
|
|
case strings.Contains(protocol, "nvme"):
|
|
return "nvme"
|
|
case strings.Contains(protocol, "sas"):
|
|
return "sas"
|
|
case strings.Contains(protocol, "ata"), strings.Contains(protocol, "sata"):
|
|
return "sata"
|
|
default:
|
|
// Try to infer from device type
|
|
devType := strings.ToLower(data.Device.Type)
|
|
if strings.Contains(devType, "nvme") {
|
|
return "nvme"
|
|
}
|
|
return "sata" // default
|
|
}
|
|
}
|
|
|
|
// formatWWN formats WWN components into a standard string.
|
|
func formatWWN(naa, oui, id uint64) string {
|
|
// Format as hex string: naa-oui-id
|
|
return strconv.FormatUint(naa, 16) + "-" +
|
|
strconv.FormatUint(oui, 16) + "-" +
|
|
strconv.FormatUint(id, 16)
|
|
}
|
|
|
|
func runCommandOutputLimited(ctx context.Context, maxBytes int, name string, args ...string) ([]byte, error) {
|
|
if maxBytes <= 0 {
|
|
return nil, fmt.Errorf("max bytes must be positive")
|
|
}
|
|
|
|
cmd := exec.CommandContext(ctx, name, args...)
|
|
cmd.Stderr = io.Discard
|
|
|
|
stdout, err := cmd.StdoutPipe()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := cmd.Start(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
output := make([]byte, 0, 4096)
|
|
buf := make([]byte, 32*1024)
|
|
exceeded := false
|
|
|
|
for {
|
|
n, readErr := stdout.Read(buf)
|
|
if n > 0 {
|
|
remaining := maxBytes - len(output)
|
|
if remaining > 0 {
|
|
if n <= remaining {
|
|
output = append(output, buf[:n]...)
|
|
} else {
|
|
output = append(output, buf[:remaining]...)
|
|
exceeded = true
|
|
}
|
|
} else {
|
|
exceeded = true
|
|
}
|
|
|
|
if exceeded && cmd.Process != nil {
|
|
_ = cmd.Process.Kill()
|
|
}
|
|
}
|
|
|
|
if readErr == io.EOF {
|
|
break
|
|
}
|
|
if readErr != nil {
|
|
_ = cmd.Wait()
|
|
return output, readErr
|
|
}
|
|
}
|
|
|
|
waitErr := cmd.Wait()
|
|
if exceeded {
|
|
return nil, fmt.Errorf("%w (%d bytes)", errCommandOutputTooLarge, maxBytes)
|
|
}
|
|
if waitErr != nil {
|
|
return output, waitErr
|
|
}
|
|
|
|
return output, nil
|
|
}
|