mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-01 04:50:16 +00:00
Implements comprehensive mdadm RAID array monitoring for Linux hosts via pulse-host-agent. Arrays are automatically detected and monitored with real-time status updates, rebuild progress tracking, and automatic alerting for degraded or failed arrays. Key changes: **Backend:** - Add mdadm package for parsing mdadm --detail output - Extend host agent report structure with RAID array data - Integrate mdadm collection into host agent (Linux-only, best-effort) - Add RAID array processing in monitoring system - Implement automatic alerting: - Critical alerts for degraded arrays or arrays with failed devices - Warning alerts for rebuilding/resyncing arrays with progress tracking - Auto-clear alerts when arrays return to healthy state **Frontend:** - Add TypeScript types for RAID arrays and devices - Display RAID arrays in host details drawer with: - Array status (clean/degraded/recovering) with color-coded indicators - Device counts (active/total/failed/spare) - Rebuild progress percentage and speed when applicable - Green for healthy, amber for rebuilding, red for degraded **Documentation:** - Document mdadm monitoring feature in HOST_AGENT.md - Explain requirements (Linux, mdadm installed, root access) - Clarify scope (software RAID only, hardware RAID not supported) **Testing:** - Add comprehensive tests for mdadm output parsing - Test parsing of healthy, degraded, and rebuilding arrays - Verify proper extraction of device states and rebuild progress All builds pass successfully. RAID monitoring is automatic and best-effort - if mdadm is not installed or no arrays exist, host agent continues reporting other metrics normally. Related to #676
228 lines
7.1 KiB
Go
228 lines
7.1 KiB
Go
package mdadm
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/rcourtman/pulse-go-rewrite/pkg/agents/host"
|
|
)
|
|
|
|
func TestParseDetail(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
device string
|
|
output string
|
|
want host.RAIDArray
|
|
wantErr bool
|
|
}{
|
|
{
|
|
name: "RAID1 healthy array",
|
|
device: "/dev/md0",
|
|
output: `/dev/md0:
|
|
Version : 1.2
|
|
Creation Time : Thu Jan 15 10:00:00 2025
|
|
Raid Level : raid1
|
|
Array Size : 102400000 (97.66 GiB 104.86 GB)
|
|
Used Dev Size : 102400000 (97.66 GiB 104.86 GB)
|
|
Raid Devices : 2
|
|
Total Devices : 2
|
|
Persistence : Superblock is persistent
|
|
|
|
Update Time : Thu Jan 16 12:00:00 2025
|
|
State : clean
|
|
Active Devices : 2
|
|
Working Devices : 2
|
|
Failed Devices : 0
|
|
Spare Devices : 0
|
|
|
|
Consistency Policy : resync
|
|
|
|
Name : server:0
|
|
UUID : 12345678:90abcdef:12345678:90abcdef
|
|
|
|
Number Major Minor RaidDevice State
|
|
0 8 1 0 active sync /dev/sda1
|
|
1 8 17 1 active sync /dev/sdb1`,
|
|
want: host.RAIDArray{
|
|
Device: "/dev/md0",
|
|
Name: "server:0",
|
|
Level: "raid1",
|
|
State: "clean",
|
|
TotalDevices: 2,
|
|
ActiveDevices: 2,
|
|
WorkingDevices: 2,
|
|
FailedDevices: 0,
|
|
SpareDevices: 0,
|
|
UUID: "12345678:90abcdef:12345678:90abcdef",
|
|
Devices: []host.RAIDDevice{
|
|
{Device: "/dev/sda1", State: "active sync", Slot: 0},
|
|
{Device: "/dev/sdb1", State: "active sync", Slot: 1},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
name: "RAID5 degraded array",
|
|
device: "/dev/md1",
|
|
output: `/dev/md1:
|
|
Version : 1.2
|
|
Creation Time : Wed Jan 14 08:00:00 2025
|
|
Raid Level : raid5
|
|
Array Size : 204800000 (195.31 GiB 209.72 GB)
|
|
Used Dev Size : 102400000 (97.66 GiB 104.86 GB)
|
|
Raid Devices : 3
|
|
Total Devices : 2
|
|
Persistence : Superblock is persistent
|
|
|
|
Update Time : Thu Jan 16 12:30:00 2025
|
|
State : clean, degraded
|
|
Active Devices : 2
|
|
Working Devices : 2
|
|
Failed Devices : 1
|
|
Spare Devices : 0
|
|
|
|
Name : server:1
|
|
UUID : abcdef12:34567890:abcdef12:34567890
|
|
|
|
Number Major Minor RaidDevice State
|
|
0 8 1 0 active sync /dev/sda1
|
|
- 0 0 1 removed
|
|
2 8 33 2 active sync /dev/sdc1
|
|
|
|
1 8 17 - faulty /dev/sdb1`,
|
|
want: host.RAIDArray{
|
|
Device: "/dev/md1",
|
|
Name: "server:1",
|
|
Level: "raid5",
|
|
State: "clean, degraded",
|
|
TotalDevices: 2,
|
|
ActiveDevices: 2,
|
|
WorkingDevices: 2,
|
|
FailedDevices: 1,
|
|
SpareDevices: 0,
|
|
UUID: "abcdef12:34567890:abcdef12:34567890",
|
|
Devices: []host.RAIDDevice{
|
|
{Device: "/dev/sda1", State: "active sync", Slot: 0},
|
|
{Device: "/dev/sdc1", State: "active sync", Slot: 2},
|
|
{Device: "/dev/sdb1", State: "faulty", Slot: -1},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
name: "RAID6 rebuilding",
|
|
device: "/dev/md2",
|
|
output: `/dev/md2:
|
|
Version : 1.2
|
|
Creation Time : Wed Jan 14 08:00:00 2025
|
|
Raid Level : raid6
|
|
Array Size : 409600000 (390.62 GiB 419.43 GB)
|
|
Used Dev Size : 102400000 (97.66 GiB 104.86 GB)
|
|
Raid Devices : 6
|
|
Total Devices : 6
|
|
Persistence : Superblock is persistent
|
|
|
|
Update Time : Thu Jan 16 13:00:00 2025
|
|
State : active, recovering
|
|
Active Devices : 5
|
|
Working Devices : 6
|
|
Failed Devices : 0
|
|
Spare Devices : 1
|
|
|
|
Rebuild Status : 42% complete
|
|
|
|
Name : server:2
|
|
UUID : fedcba09:87654321:fedcba09:87654321
|
|
|
|
Number Major Minor RaidDevice State
|
|
0 8 1 0 active sync /dev/sda1
|
|
1 8 17 1 active sync /dev/sdb1
|
|
2 8 33 2 active sync /dev/sdc1
|
|
3 8 49 3 active sync /dev/sdd1
|
|
6 8 81 4 spare rebuilding /dev/sdf1
|
|
5 8 65 5 active sync /dev/sde1`,
|
|
want: host.RAIDArray{
|
|
Device: "/dev/md2",
|
|
Name: "server:2",
|
|
Level: "raid6",
|
|
State: "active, recovering",
|
|
TotalDevices: 6,
|
|
ActiveDevices: 5,
|
|
WorkingDevices: 6,
|
|
FailedDevices: 0,
|
|
SpareDevices: 1,
|
|
UUID: "fedcba09:87654321:fedcba09:87654321",
|
|
RebuildPercent: 42.0,
|
|
Devices: []host.RAIDDevice{
|
|
{Device: "/dev/sda1", State: "active sync", Slot: 0},
|
|
{Device: "/dev/sdb1", State: "active sync", Slot: 1},
|
|
{Device: "/dev/sdc1", State: "active sync", Slot: 2},
|
|
{Device: "/dev/sdd1", State: "active sync", Slot: 3},
|
|
{Device: "/dev/sdf1", State: "spare rebuilding", Slot: 6},
|
|
{Device: "/dev/sde1", State: "active sync", Slot: 5},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
got, err := parseDetail(tt.device, tt.output)
|
|
if (err != nil) != tt.wantErr {
|
|
t.Errorf("parseDetail() error = %v, wantErr %v", err, tt.wantErr)
|
|
return
|
|
}
|
|
|
|
// Compare fields
|
|
if got.Device != tt.want.Device {
|
|
t.Errorf("Device = %v, want %v", got.Device, tt.want.Device)
|
|
}
|
|
if got.Name != tt.want.Name {
|
|
t.Errorf("Name = %v, want %v", got.Name, tt.want.Name)
|
|
}
|
|
if got.Level != tt.want.Level {
|
|
t.Errorf("Level = %v, want %v", got.Level, tt.want.Level)
|
|
}
|
|
if got.State != tt.want.State {
|
|
t.Errorf("State = %v, want %v", got.State, tt.want.State)
|
|
}
|
|
if got.TotalDevices != tt.want.TotalDevices {
|
|
t.Errorf("TotalDevices = %v, want %v", got.TotalDevices, tt.want.TotalDevices)
|
|
}
|
|
if got.ActiveDevices != tt.want.ActiveDevices {
|
|
t.Errorf("ActiveDevices = %v, want %v", got.ActiveDevices, tt.want.ActiveDevices)
|
|
}
|
|
if got.WorkingDevices != tt.want.WorkingDevices {
|
|
t.Errorf("WorkingDevices = %v, want %v", got.WorkingDevices, tt.want.WorkingDevices)
|
|
}
|
|
if got.FailedDevices != tt.want.FailedDevices {
|
|
t.Errorf("FailedDevices = %v, want %v", got.FailedDevices, tt.want.FailedDevices)
|
|
}
|
|
if got.SpareDevices != tt.want.SpareDevices {
|
|
t.Errorf("SpareDevices = %v, want %v", got.SpareDevices, tt.want.SpareDevices)
|
|
}
|
|
if got.UUID != tt.want.UUID {
|
|
t.Errorf("UUID = %v, want %v", got.UUID, tt.want.UUID)
|
|
}
|
|
if got.RebuildPercent != tt.want.RebuildPercent {
|
|
t.Errorf("RebuildPercent = %v, want %v", got.RebuildPercent, tt.want.RebuildPercent)
|
|
}
|
|
|
|
// Compare devices
|
|
if len(got.Devices) != len(tt.want.Devices) {
|
|
t.Errorf("Devices count = %v, want %v", len(got.Devices), len(tt.want.Devices))
|
|
}
|
|
for i := range got.Devices {
|
|
if i >= len(tt.want.Devices) {
|
|
break
|
|
}
|
|
if got.Devices[i].Device != tt.want.Devices[i].Device {
|
|
t.Errorf("Device[%d].Device = %v, want %v", i, got.Devices[i].Device, tt.want.Devices[i].Device)
|
|
}
|
|
if got.Devices[i].State != tt.want.Devices[i].State {
|
|
t.Errorf("Device[%d].State = %v, want %v", i, got.Devices[i].State, tt.want.Devices[i].State)
|
|
}
|
|
if got.Devices[i].Slot != tt.want.Devices[i].Slot {
|
|
t.Errorf("Device[%d].Slot = %v, want %v", i, got.Devices[i].Slot, tt.want.Devices[i].Slot)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|