Handle standby SMART temps and capture disk identity

This commit is contained in:
rcourtman 2025-11-22 07:35:13 +00:00
parent 78ffb14493
commit 596bdbfb13
3 changed files with 121 additions and 7 deletions

View file

@ -358,6 +358,56 @@ func (tc *TemperatureCollector) disableLegacySSHOnAuthFailure(err error, nodeNam
return true
}
type smartEntryRaw struct {
Device string `json:"device"`
Serial string `json:"serial,omitempty"`
WWN string `json:"wwn,omitempty"`
Model string `json:"model,omitempty"`
Type string `json:"type,omitempty"`
Temperature *int `json:"temperature"`
LastUpdated string `json:"lastUpdated,omitempty"`
StandbySkipped bool `json:"standbySkipped,omitempty"`
}
func normalizeSMARTEntries(raw []smartEntryRaw) []models.DiskTemp {
if len(raw) == 0 {
return nil
}
normalized := make([]models.DiskTemp, 0, len(raw))
for _, entry := range raw {
dev := strings.TrimSpace(entry.Device)
if dev == "" {
continue
}
var lastUpdated time.Time
if entry.LastUpdated != "" {
if parsed, err := time.Parse(time.RFC3339, entry.LastUpdated); err == nil {
lastUpdated = parsed
}
}
tempVal := 0
if entry.Temperature != nil {
tempVal = *entry.Temperature
}
normalized = append(normalized, models.DiskTemp{
Device: dev,
Serial: strings.TrimSpace(entry.Serial),
WWN: strings.TrimSpace(entry.WWN),
Model: strings.TrimSpace(entry.Model),
Type: strings.TrimSpace(entry.Type),
Temperature: tempVal,
LastUpdated: lastUpdated,
StandbySkipped: entry.StandbySkipped,
})
}
return normalized
}
// parseSensorsJSON parses the JSON output from the sensor wrapper
func (tc *TemperatureCollector) parseSensorsJSON(jsonStr string) (*models.Temperature, error) {
if strings.TrimSpace(jsonStr) == "" {
@ -368,19 +418,18 @@ func (tc *TemperatureCollector) parseSensorsJSON(jsonStr string) (*models.Temper
// Fall back to legacy format for backward compatibility
var wrapperData struct {
Sensors map[string]interface{} `json:"sensors"`
SMART []models.DiskTemp `json:"smart"`
SMART []smartEntryRaw `json:"smart"`
}
var sensorsData map[string]interface{}
var smartData []models.DiskTemp
var smartRaw []smartEntryRaw
var parsedWrapper bool
if err := json.Unmarshal([]byte(jsonStr), &wrapperData); err == nil && wrapperData.Sensors != nil {
// New wrapper format
sensorsData = wrapperData.Sensors
smartData = wrapperData.SMART
log.Debug().
Int("smartDisks", len(smartData)).
Msg("Parsed new wrapper format with SMART data")
smartRaw = wrapperData.SMART
parsedWrapper = true
} else {
// Legacy format: direct sensors -j output
if err := json.Unmarshal([]byte(jsonStr), &sensorsData); err != nil {
@ -389,6 +438,13 @@ func (tc *TemperatureCollector) parseSensorsJSON(jsonStr string) (*models.Temper
log.Debug().Msg("Parsed legacy sensors format (no SMART data)")
}
smartData := normalizeSMARTEntries(smartRaw)
if parsedWrapper {
log.Debug().
Int("smartDisks", len(smartData)).
Msg("Parsed new wrapper format with SMART data")
}
temp := &models.Temperature{
Cores: []models.CoreTemp{},
NVMe: []models.NVMeTemp{},

View file

@ -186,6 +186,63 @@ func TestParseSensorsJSON_RPiWrapper(t *testing.T) {
}
}
func TestParseSensorsJSON_SMARTWithNullTemperature(t *testing.T) {
collector := &TemperatureCollector{}
lastUpdated := time.Now().UTC().Truncate(time.Second).Format(time.RFC3339)
jsonStr := fmt.Sprintf(`{
"sensors": {
"coretemp-isa-0000": {
"Package id 0": {"temp1_input": 55.0}
}
},
"smart": [
{
"device": "/dev/sda",
"serial": "S1",
"wwn": "WWN1",
"model": "Model1",
"type": "sat",
"temperature": 34,
"lastUpdated": "%s",
"standbySkipped": false
},
{
"device": "/dev/zd0",
"temperature": null,
"standbySkipped": true
}
]
}`, lastUpdated)
temp, err := collector.parseSensorsJSON(jsonStr)
if err != nil {
t.Fatalf("unexpected error parsing SMART wrapper output: %v", err)
}
if temp == nil || !temp.Available {
t.Fatalf("expected temperature data to be available when SMART data present")
}
if !temp.HasSMART {
t.Fatalf("expected HasSMART to be true when SMART data present")
}
if len(temp.SMART) != 2 {
t.Fatalf("expected two SMART entries, got %d", len(temp.SMART))
}
if temp.SMART[0].Temperature != 34 {
t.Fatalf("expected first SMART temperature 34, got %d", temp.SMART[0].Temperature)
}
if temp.SMART[0].LastUpdated.IsZero() {
t.Fatalf("expected first SMART entry to include parsed lastUpdated timestamp")
}
if temp.SMART[1].Temperature != 0 {
t.Fatalf("expected standby SMART entry to default to temperature 0, got %d", temp.SMART[1].Temperature)
}
if !temp.SMART[1].StandbySkipped {
t.Fatalf("expected standbySkipped to be true for second SMART entry")
}
}
func TestShouldDisableProxy(t *testing.T) {
collector := &TemperatureCollector{}

View file

@ -2483,11 +2483,12 @@ refresh_smart_cache() {
for dev in "${disks[@]}"; do
# Use smartctl with standby check to avoid waking sleeping drives
# -n standby: skip if drive is in standby/sleep mode
# -i: include identity data (serial/WWN/model)
# --json=o: output original smartctl JSON format
# timeout: prevent hanging on problematic drives
local output
if output=$(timeout ${MAX_SMARTCTL_TIME}s smartctl -n standby -A --json=o "$dev" 2>/dev/null); then
if output=$(timeout ${MAX_SMARTCTL_TIME}s smartctl -n standby -i -A --json=o "$dev" 2>/dev/null); then
# Parse the JSON output
local temp=$(echo "$output" | jq -r '
.temperature.current //