Harden alert history manager against concurrent access

This commit is contained in:
rcourtman 2025-10-03 12:45:04 +00:00
parent 9c711af553
commit 75b3a4fec4
2 changed files with 98 additions and 6 deletions

View file

@ -79,7 +79,7 @@ func (hm *HistoryManager) AddAlert(alert Alert) {
defer hm.mu.Unlock()
entry := HistoryEntry{
Alert: alert,
Alert: *alert.Clone(),
Timestamp: time.Now(),
}
@ -170,26 +170,32 @@ func (hm *HistoryManager) loadHistory() error {
// saveHistory saves history to disk
func (hm *HistoryManager) saveHistory() error {
hm.mu.RLock()
data, err := json.MarshalIndent(hm.history, "", " ")
snapshot := make([]HistoryEntry, len(hm.history))
copy(snapshot, hm.history)
hm.mu.RUnlock()
data, err := json.MarshalIndent(snapshot, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal history: %w", err)
}
// Create backup of existing file
if _, err := os.Stat(hm.historyFile); err == nil {
if err := os.Rename(hm.historyFile, hm.backupFile); err != nil {
historyFile := hm.historyFile
backupFile := hm.backupFile
if _, err := os.Stat(historyFile); err == nil {
if err := os.Rename(historyFile, backupFile); err != nil {
log.Warn().Err(err).Msg("Failed to create backup file")
}
}
// Write new file
if err := os.WriteFile(hm.historyFile, data, 0644); err != nil {
if err := os.WriteFile(historyFile, data, 0644); err != nil {
return fmt.Errorf("failed to write history file: %w", err)
}
log.Debug().Int("entries", len(hm.history)).Msg("Saved alert history")
log.Debug().Int("entries", len(snapshot)).Msg("Saved alert history")
return nil
}

View file

@ -0,0 +1,86 @@
package alerts
import (
"fmt"
"os"
"path/filepath"
"sync"
"testing"
"time"
"github.com/rs/zerolog"
)
func TestHistoryManagerConcurrentAccess(t *testing.T) {
origLevel := zerolog.GlobalLevel()
zerolog.SetGlobalLevel(zerolog.Disabled)
defer zerolog.SetGlobalLevel(origLevel)
tempDir := t.TempDir()
hm := &HistoryManager{
dataDir: tempDir,
historyFile: filepath.Join(tempDir, HistoryFileName),
backupFile: filepath.Join(tempDir, HistoryBackupFileName),
history: make([]HistoryEntry, 0),
saveInterval: 50 * time.Millisecond,
stopChan: make(chan struct{}),
}
// Start periodic save and cleanup routines
hm.startPeriodicSave()
go hm.cleanupRoutine()
const iterations = 500
var wg sync.WaitGroup
wg.Add(4)
go func() {
defer wg.Done()
for i := 0; i < iterations; i++ {
hm.AddAlert(Alert{
ID: fmt.Sprintf("alert-%d", i),
Type: "test",
Level: AlertLevelWarning,
Message: "test alert",
StartTime: time.Now(),
})
}
}()
go func() {
defer wg.Done()
for i := 0; i < iterations; i++ {
hm.GetHistory(time.Now().Add(-1*time.Hour), 100)
hm.GetAllHistory(100)
}
}()
go func() {
defer wg.Done()
for i := 0; i < iterations; i++ {
hm.RemoveAlert(fmt.Sprintf("alert-%d", i))
time.Sleep(1 * time.Millisecond)
}
}()
go func() {
defer wg.Done()
for i := 0; i < iterations/10; i++ {
_ = hm.ClearAllHistory()
time.Sleep(5 * time.Millisecond)
}
}()
wg.Wait()
close(hm.stopChan)
if hm.saveTicker != nil {
hm.saveTicker.Stop()
}
// give final save a chance
time.Sleep(20 * time.Millisecond)
_ = os.RemoveAll(tempDir)
}