Pulse/internal/api/session_store.go
rcourtman 6ca4d9b750 Fix P1/P2 infrastructure issues: panic recovery and optimizations
This commit addresses 4 P1 important issues and 1 P2 optimization in infrastructure components:

**P1-1: Missing Panic Recovery in Discovery Service** (service.go:172-195, 499-542)
- **Problem**: No panic recovery in Start(), ForceRefresh(), SetSubnet() goroutines
- **Impact**: Silent service death if scan panics, broken discovery with no monitoring
- **Fix**:
  - Wrapped initial scan goroutine with defer/recover (lines 172-182)
  - Wrapped scanLoop goroutine with defer/recover (lines 185-195)
  - Wrapped ForceRefresh scan with defer/recover (lines 499-509)
  - Wrapped SetSubnet scan with defer/recover (lines 532-542)
  - All log panics with stack traces for debugging

**P1-2: Missing Panic Recovery in Config Watcher Callback** (watcher.go:546-556)
- **Problem**: User-provided onMockReload callback could panic and crash watcher
- **Impact**: Panicking callback kills watcher goroutine, no config updates
- **Fix**: Wrapped callback invocation with defer/recover and stack trace logging

**P1-3: Session Store Stop() Using Send Instead of Close** (session_store.go:16-84)
- **Problem**: Stop() used channel send which blocks if nobody reads
- **Impact**: Stop() hangs if backgroundWorker already exited
- **Fix**:
  - Added sync.Once field stopOnce (line 22)
  - Changed Stop() to use close() within stopOnce.Do() (lines 80-84)
  - Prevents double-close panic and ensures all readers are signaled

**P2-1: Backup Cleanup Inefficient O(n²) Sort** (persistence.go:1424-1427)
- **Problem**: Bubble sort used to sort backups by modification time
- **Impact**: Inefficient for large backup counts (>100 files)
- **Fix**:
  - Replaced bubble sort with sort.Slice() using O(n log n) algorithm
  - Added "sort" import (line 9)
  - Maintains same oldest-first ordering for deletion logic

All fixes add defensive programming without changing external behavior. Panic recovery ensures services continue operating even with bugs, while optimization reduces cleanup time for backup-heavy environments.
2025-11-07 09:55:22 +00:00

267 lines
6.2 KiB
Go

package api
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"os"
"path/filepath"
"sync"
"time"
"github.com/rs/zerolog/log"
)
// SessionStore handles persistent session storage
type SessionStore struct {
sessions map[string]*SessionData
mu sync.RWMutex
dataPath string
saveTicker *time.Ticker
stopChan chan bool
stopOnce sync.Once // Ensures Stop() can only close channel once
}
func sessionHash(token string) string {
sum := sha256.Sum256([]byte(token))
return hex.EncodeToString(sum[:])
}
type sessionPersisted struct {
Key string `json:"key"`
ExpiresAt time.Time `json:"expires_at"`
CreatedAt time.Time `json:"created_at"`
UserAgent string `json:"user_agent,omitempty"`
IP string `json:"ip,omitempty"`
}
// SessionData represents a user session
type SessionData struct {
ExpiresAt time.Time `json:"expires_at"`
CreatedAt time.Time `json:"created_at"`
UserAgent string `json:"user_agent,omitempty"`
IP string `json:"ip,omitempty"`
}
// NewSessionStore creates a new persistent session store
func NewSessionStore(dataPath string) *SessionStore {
store := &SessionStore{
sessions: make(map[string]*SessionData),
dataPath: dataPath,
stopChan: make(chan bool),
}
// Load existing sessions from disk
store.load()
// Start periodic save and cleanup
store.saveTicker = time.NewTicker(5 * time.Minute)
go store.backgroundWorker()
return store
}
// backgroundWorker handles periodic saves and cleanup
func (s *SessionStore) backgroundWorker() {
for {
select {
case <-s.saveTicker.C:
s.cleanup()
s.save()
case <-s.stopChan:
s.save()
return
}
}
}
// Stop gracefully stops the session store
func (s *SessionStore) Stop() {
s.stopOnce.Do(func() {
s.saveTicker.Stop()
close(s.stopChan) // Use close instead of send to signal all readers
s.save()
})
}
// CreateSession creates a new session
func (s *SessionStore) CreateSession(token string, duration time.Duration, userAgent, ip string) {
s.mu.Lock()
defer s.mu.Unlock()
key := sessionHash(token)
s.sessions[key] = &SessionData{
ExpiresAt: time.Now().Add(duration),
CreatedAt: time.Now(),
UserAgent: userAgent,
IP: ip,
}
// Save immediately for important operations
s.saveUnsafe()
}
// ValidateSession checks if a session is valid
func (s *SessionStore) ValidateSession(token string) bool {
s.mu.RLock()
defer s.mu.RUnlock()
session, exists := s.sessions[sessionHash(token)]
if !exists {
return false
}
return time.Now().Before(session.ExpiresAt)
}
// ExtendSession extends the expiration of a session
func (s *SessionStore) ExtendSession(token string, duration time.Duration) {
s.mu.Lock()
defer s.mu.Unlock()
if session, exists := s.sessions[sessionHash(token)]; exists {
session.ExpiresAt = time.Now().Add(duration)
s.saveUnsafe()
}
}
// DeleteSession removes a session
func (s *SessionStore) DeleteSession(token string) {
s.mu.Lock()
defer s.mu.Unlock()
delete(s.sessions, sessionHash(token))
s.saveUnsafe()
}
// GetSession returns session data if it exists and is valid
func (s *SessionStore) GetSession(token string) *SessionData {
s.mu.RLock()
defer s.mu.RUnlock()
session, exists := s.sessions[sessionHash(token)]
if !exists || time.Now().After(session.ExpiresAt) {
return nil
}
return session
}
// cleanup removes expired sessions
func (s *SessionStore) cleanup() {
s.mu.Lock()
defer s.mu.Unlock()
now := time.Now()
for key, session := range s.sessions {
if now.After(session.ExpiresAt) {
delete(s.sessions, key)
log.Debug().Str("sessionKey", key[:8]+"...").Msg("Cleaned up expired session")
}
}
}
// save persists sessions to disk
func (s *SessionStore) save() {
s.mu.RLock()
defer s.mu.RUnlock()
s.saveUnsafe()
}
// saveUnsafe saves without locking (caller must hold lock)
func (s *SessionStore) saveUnsafe() {
sessionsFile := filepath.Join(s.dataPath, "sessions.json")
// Create directory if it doesn't exist
if err := os.MkdirAll(s.dataPath, 0700); err != nil {
log.Error().Err(err).Msg("Failed to create sessions directory")
return
}
// Marshal sessions
persisted := make([]sessionPersisted, 0, len(s.sessions))
for key, session := range s.sessions {
persisted = append(persisted, sessionPersisted{
Key: key,
ExpiresAt: session.ExpiresAt,
CreatedAt: session.CreatedAt,
UserAgent: session.UserAgent,
IP: session.IP,
})
}
data, err := json.Marshal(persisted)
if err != nil {
log.Error().Err(err).Msg("Failed to marshal sessions")
return
}
// Write to temporary file first
tmpFile := sessionsFile + ".tmp"
if err := os.WriteFile(tmpFile, data, 0600); err != nil {
log.Error().Err(err).Msg("Failed to write sessions file")
return
}
// Atomic rename
if err := os.Rename(tmpFile, sessionsFile); err != nil {
log.Error().Err(err).Msg("Failed to rename sessions file")
return
}
log.Debug().Int("count", len(s.sessions)).Msg("Sessions saved to disk")
}
// load reads sessions from disk
func (s *SessionStore) load() {
sessionsFile := filepath.Join(s.dataPath, "sessions.json")
data, err := os.ReadFile(sessionsFile)
if err != nil {
if !os.IsNotExist(err) {
log.Error().Err(err).Msg("Failed to read sessions file")
}
return
}
now := time.Now()
s.sessions = make(map[string]*SessionData)
var persisted []sessionPersisted
if err := json.Unmarshal(data, &persisted); err == nil {
for _, entry := range persisted {
if now.After(entry.ExpiresAt) {
continue
}
s.sessions[entry.Key] = &SessionData{
ExpiresAt: entry.ExpiresAt,
CreatedAt: entry.CreatedAt,
UserAgent: entry.UserAgent,
IP: entry.IP,
}
}
log.Info().Int("loaded", len(s.sessions)).Int("total", len(persisted)).Msg("Sessions loaded from disk (hashed format)")
return
}
// Legacy map format fallback (keys stored as raw tokens)
var legacy map[string]*SessionData
if err := json.Unmarshal(data, &legacy); err != nil {
log.Error().Err(err).Msg("Failed to unmarshal legacy sessions")
return
}
loaded := 0
for token, session := range legacy {
if now.After(session.ExpiresAt) {
continue
}
s.sessions[sessionHash(token)] = session
loaded++
}
log.Info().
Int("loaded", loaded).
Int("total", len(legacy)).
Msg("Sessions loaded from disk (legacy format migrated)")
}