mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 11:30:15 +00:00
Proxmox status.Mem includes page cache as "used" memory, inflating reported VM usage. The existing fallbacks (balloon meminfo, RRD, linked host agent) were frequently unavailable, causing most VMs to fall through to the inflated status-mem source. Adds a new last-resort fallback that reads /proc/meminfo via the QEMU guest agent file-read endpoint to get accurate MemAvailable. Results are cached (60s positive, 5min negative backoff for unsupported VMs). Also fixes: RRD memavailable fallback missing from traditional polling path, cache key collisions in multi-PVE setups, FreeMem underflow guard inconsistency, and integer overflow in kB-to-bytes conversion.
1014 lines
30 KiB
Go
1014 lines
30 KiB
Go
package monitoring
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/ai/memory"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/alerts"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/config"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/models"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/notifications"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/resources"
|
|
"github.com/rcourtman/pulse-go-rewrite/pkg/pbs"
|
|
"github.com/rcourtman/pulse-go-rewrite/pkg/proxmox"
|
|
)
|
|
|
|
// Minimal mock PVE client for interface satisfaction
|
|
type mockPVEClient struct {
|
|
PVEClientInterface
|
|
}
|
|
|
|
func (m *mockPVEClient) GetNodes(ctx context.Context) ([]proxmox.Node, error) { return nil, nil }
|
|
|
|
func (m *mockPVEClient) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
|
return 0, fmt.Errorf("not implemented")
|
|
}
|
|
|
|
func TestMonitor_GetConnectionStatuses(t *testing.T) {
|
|
// Real Mode
|
|
m := &Monitor{
|
|
config: &config.Config{
|
|
PVEInstances: []config.PVEInstance{{Name: "pve1"}, {Name: "pve2"}},
|
|
PBSInstances: []config.PBSInstance{{Name: "pbs1"}, {Name: "pbs2"}},
|
|
},
|
|
state: models.NewState(),
|
|
pveClients: make(map[string]PVEClientInterface),
|
|
pbsClients: make(map[string]*pbs.Client),
|
|
}
|
|
|
|
// Set connection health in state
|
|
m.state.SetConnectionHealth("pve1", true)
|
|
m.state.SetConnectionHealth("pbs-pbs1", true)
|
|
|
|
// Populate clients for "connected" instances
|
|
m.pveClients["pve1"] = &mockPVEClient{}
|
|
m.pbsClients["pbs1"] = &pbs.Client{}
|
|
|
|
// Force mock mode off for this test
|
|
// Monitor.SetMockMode(false) calls mock.SetEnabled(false).
|
|
// But since we didn't init alertManager/metricsHistory, SetMockMode might panic unless we skip parts.
|
|
// However, monitor.go's GetConnectionStatuses logic only checks mock.IsMockEnabled().
|
|
// We assume default state of mock package is false or we rely on SetMockMode(false) being called in other tests?
|
|
// Let's call SetMockMode(true) then false carefully OR assume false.
|
|
// Safest is to not call SetMockMode methods that rely on valid Monitor fields, but directly rely on mock package state?
|
|
// But we cannot access mock package directly here easily if it is internal/monitoring/mock?
|
|
// Wait, IsMockEnabled is likely in `internal/monitoring/mock` or `internal/mock`?
|
|
// monitor.go import: "github.com/rcourtman/pulse-go-rewrite/internal/monitoring/mock"
|
|
// So we can import and set it if we want.
|
|
// For now, let's assume it's false or use the one from monitor.
|
|
// BUT we found earlier SetMockMode panics if fields missing.
|
|
// Let's just create a monitor with needed fields for SetMockMode if we really need to toggle it.
|
|
// Or just run the test assuming global state is false (which it usually is).
|
|
|
|
statuses := m.GetConnectionStatuses()
|
|
|
|
if !statuses["pve-pve1"] {
|
|
t.Error("pve1 should be connected")
|
|
}
|
|
if statuses["pve-pve2"] {
|
|
t.Error("pve2 should be disconnected")
|
|
}
|
|
if !statuses["pbs-pbs1"] {
|
|
t.Error("pbs1 should be connected")
|
|
}
|
|
if statuses["pbs-pbs2"] {
|
|
t.Error("pbs2 should be disconnected")
|
|
}
|
|
}
|
|
|
|
func TestMonitor_Stop(t *testing.T) {
|
|
// Initialize a monitor with mostly nil dependencies, but enough to pass Stop()
|
|
// This ensures Stop is safe to call even if initialization was partial
|
|
m := &Monitor{
|
|
config: &config.Config{},
|
|
state: models.NewState(),
|
|
}
|
|
|
|
// Should not panic
|
|
m.Stop()
|
|
}
|
|
|
|
func TestPollPBSInstance(t *testing.T) {
|
|
// Create a mock PBS server
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
switch r.URL.Path {
|
|
case "/api2/json/nodes/localhost/status":
|
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
|
"data": map[string]interface{}{
|
|
"cpu": 0.1,
|
|
"memory": map[string]interface{}{
|
|
"used": 1024,
|
|
"total": 2048,
|
|
},
|
|
"uptime": 100,
|
|
},
|
|
})
|
|
case "/api2/json/admin/datastore":
|
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
|
"data": []map[string]interface{}{
|
|
{"store": "store1", "total": 1000, "used": 100},
|
|
},
|
|
})
|
|
default:
|
|
if strings.Contains(r.URL.Path, "version") {
|
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
|
"data": map[string]interface{}{
|
|
"version": "3.0",
|
|
"release": "1",
|
|
},
|
|
})
|
|
return
|
|
}
|
|
http.Error(w, "not found", http.StatusNotFound)
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
// Initialize PBS Client
|
|
client, err := pbs.NewClient(pbs.ClientConfig{
|
|
Host: server.URL,
|
|
TokenName: "root@pam!token",
|
|
TokenValue: "secret",
|
|
Timeout: 1 * time.Second,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Failed to create client: %v", err)
|
|
}
|
|
|
|
// Initialize Monitor
|
|
m := &Monitor{
|
|
config: &config.Config{
|
|
PBSInstances: []config.PBSInstance{
|
|
{
|
|
Name: "pbs-test",
|
|
Host: server.URL,
|
|
MonitorDatastores: true,
|
|
},
|
|
},
|
|
},
|
|
state: models.NewState(),
|
|
stalenessTracker: NewStalenessTracker(nil), // Pass nil or mock PollMetrics
|
|
nodePendingUpdatesCache: make(map[string]pendingUpdatesCache),
|
|
}
|
|
|
|
// Execute polling
|
|
ctx := context.Background()
|
|
m.pollPBSInstance(ctx, "pbs-test", client)
|
|
|
|
// Verify State
|
|
// Accessing state directly without lock since we are the only goroutine here
|
|
found := false
|
|
for _, instance := range m.state.PBSInstances {
|
|
if instance.Name == "pbs-test" {
|
|
found = true
|
|
if instance.Status != "online" {
|
|
t.Errorf("Expected status online, got %s", instance.Status)
|
|
}
|
|
if len(instance.Datastores) != 1 {
|
|
t.Errorf("Expected 1 datastore, got %d", len(instance.Datastores))
|
|
}
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
t.Error("PBS instance not found in state")
|
|
}
|
|
}
|
|
|
|
func TestPollPBSBackups(t *testing.T) {
|
|
// Mock PBS server
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if strings.Contains(r.URL.Path, "/groups") {
|
|
// groups response
|
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
|
"data": []map[string]interface{}{
|
|
{"backup-type": "vm", "backup-id": "100", "owner": "root@pam", "backup-count": 1},
|
|
},
|
|
})
|
|
return
|
|
}
|
|
if strings.Contains(r.URL.Path, "/snapshots") {
|
|
// snapshots response
|
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
|
"data": []map[string]interface{}{
|
|
{"backup-type": "vm", "backup-id": "100", "backup-time": 1600000000, "fingerprint": "fp1", "owner": "root@pam"},
|
|
},
|
|
})
|
|
return
|
|
}
|
|
w.WriteHeader(http.StatusNotFound)
|
|
}))
|
|
defer server.Close()
|
|
|
|
// Setup client
|
|
client, err := pbs.NewClient(pbs.ClientConfig{
|
|
Host: server.URL,
|
|
TokenName: "root@pam!token",
|
|
TokenValue: "secret",
|
|
})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// Setup monitor
|
|
m := &Monitor{
|
|
config: &config.Config{
|
|
PBSInstances: []config.PBSInstance{
|
|
{Name: "pbs1", Host: server.URL},
|
|
},
|
|
},
|
|
state: models.NewState(),
|
|
nodePendingUpdatesCache: make(map[string]pendingUpdatesCache),
|
|
// We need to initialize pbsBackups map in state if it's nil?
|
|
// NewState() initializes it.
|
|
}
|
|
|
|
// Define datastores
|
|
datastores := []models.PBSDatastore{
|
|
{Name: "store1", Namespaces: []models.PBSNamespace{{Path: ""}}},
|
|
}
|
|
|
|
// Execute
|
|
m.pollPBSBackups(context.Background(), "pbs1", client, datastores)
|
|
|
|
// Verify
|
|
found := false
|
|
for _, b := range m.state.PBSBackups {
|
|
if b.Instance == "pbs1" && b.Datastore == "store1" && b.BackupType == "vm" && b.VMID == "100" {
|
|
found = true
|
|
if b.Owner != "root@pam" {
|
|
t.Errorf("Expected owner root@pam, got %s", b.Owner)
|
|
}
|
|
}
|
|
}
|
|
if !found {
|
|
t.Error("PBS backup not found in state")
|
|
}
|
|
}
|
|
|
|
func TestMonitor_GettersAndSetters(t *testing.T) {
|
|
m := &Monitor{
|
|
config: &config.Config{},
|
|
state: models.NewState(),
|
|
startTime: time.Now(),
|
|
nodePendingUpdatesCache: make(map[string]pendingUpdatesCache),
|
|
}
|
|
|
|
// Temperature Monitoring (just ensuring no panic/execution)
|
|
m.EnableTemperatureMonitoring()
|
|
m.DisableTemperatureMonitoring()
|
|
|
|
// GetStartTime
|
|
if m.GetStartTime().IsZero() {
|
|
t.Error("GetStartTime returned zero time")
|
|
}
|
|
|
|
// GetState (returns struct, not pointer)
|
|
state := m.GetState()
|
|
if state.Nodes != nil && len(state.Nodes) > 0 {
|
|
// Just checking access
|
|
}
|
|
|
|
// SetMockMode requires dependencies (alertManager, metricsHistory)
|
|
// skipping for this simple test to avoid panic
|
|
|
|
// GetDiscoveryService
|
|
if m.GetDiscoveryService() != nil {
|
|
t.Error("GetDiscoveryService expected nil initially")
|
|
}
|
|
|
|
// Set/Get ResourceStore
|
|
if m.resourceStore != nil {
|
|
t.Error("resourceStore should be nil")
|
|
}
|
|
var rs ResourceStoreInterface // nil interface
|
|
m.SetResourceStore(rs)
|
|
|
|
// Other getters
|
|
if m.GetAlertManager() != nil {
|
|
t.Error("expected nil")
|
|
}
|
|
if m.GetIncidentStore() != nil {
|
|
t.Error("expected nil")
|
|
}
|
|
if m.GetNotificationManager() != nil {
|
|
t.Error("expected nil")
|
|
}
|
|
if m.GetConfigPersistence() != nil {
|
|
t.Error("expected nil")
|
|
}
|
|
if m.GetMetricsStore() != nil {
|
|
t.Error("expected nil")
|
|
}
|
|
if m.GetMetricsHistory() != nil {
|
|
t.Error("expected nil")
|
|
}
|
|
}
|
|
|
|
func TestMonitor_DiscoveryService(t *testing.T) {
|
|
m := &Monitor{
|
|
config: &config.Config{},
|
|
nodePendingUpdatesCache: make(map[string]pendingUpdatesCache),
|
|
}
|
|
|
|
// StartDiscoveryService
|
|
// It creates a new service if nil.
|
|
m.StartDiscoveryService(context.Background(), nil, "auto")
|
|
if m.discoveryService == nil {
|
|
t.Error("StartDiscoveryService failed to create service")
|
|
}
|
|
|
|
// GetDiscoveryService
|
|
if m.GetDiscoveryService() != m.discoveryService {
|
|
t.Error("GetDiscoveryService returned incorrect service")
|
|
}
|
|
|
|
// StopDiscoveryService
|
|
m.StopDiscoveryService()
|
|
}
|
|
|
|
type mockPollExecutor struct {
|
|
executed chan PollTask
|
|
}
|
|
|
|
func (e *mockPollExecutor) Execute(ctx context.Context, task PollTask) {
|
|
if e.executed != nil {
|
|
e.executed <- task
|
|
}
|
|
}
|
|
|
|
func TestMonitor_TaskWorker(t *testing.T) {
|
|
queue := NewTaskQueue()
|
|
execChan := make(chan PollTask, 1)
|
|
|
|
m := &Monitor{
|
|
taskQueue: queue,
|
|
executor: &mockPollExecutor{executed: execChan},
|
|
pbsClients: map[string]*pbs.Client{"test-instance": {}}, // Dummy client, struct pointer is enough for check
|
|
nodePendingUpdatesCache: make(map[string]pendingUpdatesCache),
|
|
// scheduler: nil -> will use fallback rescheduling
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
// Add a task
|
|
queue.Upsert(ScheduledTask{
|
|
InstanceName: "test-instance",
|
|
InstanceType: InstanceTypePBS, // Assuming this is valid
|
|
NextRun: time.Now().Add(-1 * time.Minute), // Overdue
|
|
Interval: time.Minute,
|
|
})
|
|
|
|
// Run worker
|
|
// Using startTaskWorkers(ctx, 1) or directly taskWorker(ctx, 0)
|
|
// startTaskWorkers launches goroutine.
|
|
m.startTaskWorkers(ctx, 1)
|
|
|
|
// Wait for execution
|
|
select {
|
|
case task := <-execChan:
|
|
if task.InstanceName != "test-instance" {
|
|
t.Errorf("Executed wrong task: %s", task.InstanceName)
|
|
}
|
|
case <-time.After(2 * time.Second):
|
|
t.Fatal("Task execution timed out")
|
|
}
|
|
|
|
// Verify rescheduling occurred (task should be in queue again with future time)
|
|
// Wait for reschedule? reschedule happens after Execute returns.
|
|
// We might need to wait a small bit or check queue periodically.
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
// Check queue size (should be 1)
|
|
if queue.Size() != 1 {
|
|
t.Errorf("Task was not rescheduled, queue size: %d", queue.Size())
|
|
}
|
|
}
|
|
|
|
func TestMonitor_AlertCallbacks(t *testing.T) {
|
|
// Need an initialized AlertManager because SetAlertTriggeredAICallback delegates to it
|
|
// If we cannot init it easily, we might skip this test logic that depends on alertManager
|
|
// However, SetAlertTriggeredAICallback checks for nil alertManager and returns early.
|
|
// So if we pass a nil alertManager, the callback is never set.
|
|
|
|
// Test early return logic at least
|
|
m := &Monitor{}
|
|
m.SetAlertTriggeredAICallback(func(alert *alerts.Alert) {})
|
|
|
|
// To test firing logic, we can call handleAlertFired directly.
|
|
// It takes *alerts.Alert
|
|
alert := &alerts.Alert{ID: "test-alert"}
|
|
|
|
// handleAlertFired checks for nil, then logs/broadcasts.
|
|
m.handleAlertFired(alert)
|
|
// No panic = pass
|
|
|
|
m.handleAlertResolved("test-alert")
|
|
m.handleAlertAcknowledged(alert, "user")
|
|
m.handleAlertUnacknowledged(alert, "user")
|
|
}
|
|
|
|
type mockResourceStore struct{}
|
|
|
|
func (m *mockResourceStore) ShouldSkipAPIPolling(hostname string) bool {
|
|
return hostname == "ignored-node"
|
|
}
|
|
func (m *mockResourceStore) GetPollingRecommendations() map[string]float64 { return nil }
|
|
func (m *mockResourceStore) GetAll() []resources.Resource { return nil }
|
|
func (m *mockResourceStore) PopulateFromSnapshot(snapshot models.StateSnapshot) {}
|
|
|
|
func TestMonitor_ShouldSkipNodeMetrics(t *testing.T) {
|
|
m := &Monitor{
|
|
resourceStore: &mockResourceStore{},
|
|
}
|
|
|
|
if !m.shouldSkipNodeMetrics("ignored-node") {
|
|
t.Error("Should skip ignored-node")
|
|
}
|
|
if m.shouldSkipNodeMetrics("other-node") {
|
|
t.Error("Should not skip other-node")
|
|
}
|
|
}
|
|
|
|
func TestMonitor_ResourceUpdate(t *testing.T) {
|
|
mockStore := &mockResourceStore{}
|
|
m := &Monitor{
|
|
resourceStore: mockStore,
|
|
}
|
|
|
|
// updateResourceStore
|
|
m.updateResourceStore(models.StateSnapshot{})
|
|
// PopulateFromSnapshot called (no-op in mock, but covered)
|
|
|
|
// getResourcesForBroadcast
|
|
res := m.getResourcesForBroadcast()
|
|
if res != nil {
|
|
t.Error("Expected nil resources from mock")
|
|
}
|
|
}
|
|
|
|
func TestMonitor_DockerHostManagement(t *testing.T) {
|
|
m := &Monitor{
|
|
state: models.NewState(),
|
|
removedDockerHosts: make(map[string]time.Time),
|
|
dockerTokenBindings: make(map[string]string),
|
|
dockerCommands: make(map[string]*dockerHostCommand),
|
|
dockerCommandIndex: make(map[string]string),
|
|
nodePendingUpdatesCache: make(map[string]pendingUpdatesCache),
|
|
}
|
|
|
|
// Initialize config
|
|
m.config = &config.Config{}
|
|
|
|
// Initialize DockerMetadataStore with temp dir
|
|
m.dockerMetadataStore = config.NewDockerMetadataStore(t.TempDir(), nil)
|
|
|
|
// Add a docker host to state
|
|
host := models.DockerHost{
|
|
ID: "docker1",
|
|
Hostname: "docker-host-1",
|
|
}
|
|
m.state.UpsertDockerHost(host)
|
|
|
|
// Test SetDockerHostCustomDisplayName
|
|
_, err := m.SetDockerHostCustomDisplayName("docker1", "My Docker Host")
|
|
if err != nil {
|
|
t.Errorf("SetDockerHostCustomDisplayName failed: %v", err)
|
|
}
|
|
// Verify
|
|
hosts := m.state.GetDockerHosts()
|
|
if len(hosts) != 1 || hosts[0].CustomDisplayName != "My Docker Host" {
|
|
t.Errorf("CustomDisplayName mismatch: got %v", hosts[0].CustomDisplayName)
|
|
}
|
|
|
|
// Test HideDockerHost
|
|
_, err = m.HideDockerHost("docker1")
|
|
if err != nil {
|
|
t.Errorf("HideDockerHost failed: %v", err)
|
|
}
|
|
hosts = m.state.GetDockerHosts()
|
|
if len(hosts) != 1 || !hosts[0].Hidden {
|
|
t.Error("Host should be hidden")
|
|
}
|
|
|
|
// Test UnhideDockerHost
|
|
_, err = m.UnhideDockerHost("docker1")
|
|
if err != nil {
|
|
t.Errorf("UnhideDockerHost failed: %v", err)
|
|
}
|
|
hosts = m.state.GetDockerHosts()
|
|
if len(hosts) != 1 || hosts[0].Hidden {
|
|
t.Error("Host should be unhidden")
|
|
}
|
|
|
|
// Test RemoveDockerHost
|
|
removedHost, err := m.RemoveDockerHost("docker1")
|
|
if err != nil {
|
|
t.Errorf("RemoveDockerHost failed: %v", err)
|
|
}
|
|
if removedHost.ID != "docker1" {
|
|
t.Errorf("Expected removed host ID docker1, got %s", removedHost.ID)
|
|
}
|
|
hosts = m.state.GetDockerHosts()
|
|
if len(hosts) != 0 {
|
|
t.Error("Host should be removed")
|
|
}
|
|
|
|
// Test RemoveDockerHost with non-existent host
|
|
_, err = m.RemoveDockerHost("docker2")
|
|
if err != nil {
|
|
t.Errorf("RemoveDockerHost for non-existent host failed: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestMonitor_HostAgentManagement(t *testing.T) {
|
|
m := &Monitor{
|
|
state: models.NewState(),
|
|
}
|
|
|
|
// Initialize HostMetadataStore
|
|
m.hostMetadataStore = config.NewHostMetadataStore(t.TempDir(), nil)
|
|
|
|
// Add a host linked to a node
|
|
host := models.Host{
|
|
ID: "host1",
|
|
Hostname: "node1",
|
|
LinkedNodeID: "node1",
|
|
}
|
|
m.state.UpsertHost(host)
|
|
m.nodePendingUpdatesCache = make(map[string]pendingUpdatesCache)
|
|
|
|
// Test UnlinkHostAgent
|
|
err := m.UnlinkHostAgent("host1")
|
|
if err != nil {
|
|
t.Errorf("UnlinkHostAgent failed: %v", err)
|
|
}
|
|
// Verify
|
|
hosts := m.state.GetHosts()
|
|
if len(hosts) != 1 || hosts[0].LinkedNodeID != "" {
|
|
t.Errorf("LinkedNodeID should be empty, got %q", hosts[0].LinkedNodeID)
|
|
}
|
|
|
|
// Test UpdateHostAgentConfig
|
|
enabled := true
|
|
err = m.UpdateHostAgentConfig("host1", &enabled)
|
|
if err != nil {
|
|
t.Errorf("UpdateHostAgentConfig failed: %v", err)
|
|
}
|
|
|
|
// Verify in state
|
|
hosts = m.state.GetHosts()
|
|
if len(hosts) != 1 || !hosts[0].CommandsEnabled {
|
|
t.Error("CommandsEnabled should be true")
|
|
}
|
|
|
|
// Test UpdateHostAgentConfig with non-existent host (should handle gracefully, creating metadata)
|
|
err = m.UpdateHostAgentConfig("host2", &enabled)
|
|
if err != nil {
|
|
t.Errorf("UpdateHostAgentConfig for new host failed: %v", err)
|
|
}
|
|
}
|
|
|
|
// Robust Mock PVE Client
|
|
type mockPVEClientExtended struct {
|
|
mockPVEClient // Embed basic mock
|
|
nodes []proxmox.Node
|
|
resources []proxmox.ClusterResource
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetNodes(ctx context.Context) ([]proxmox.Node, error) {
|
|
if m.nodes == nil {
|
|
return []proxmox.Node{}, nil
|
|
}
|
|
return m.nodes, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetClusterResources(ctx context.Context, resourceType string) ([]proxmox.ClusterResource, error) {
|
|
if m.resources == nil {
|
|
return []proxmox.ClusterResource{}, nil
|
|
}
|
|
return m.resources, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetVMStatus(ctx context.Context, node string, vmid int) (*proxmox.VMStatus, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetNodeStatus(ctx context.Context, node string) (*proxmox.NodeStatus, error) {
|
|
return &proxmox.NodeStatus{
|
|
Memory: &proxmox.MemoryStatus{
|
|
Total: 1000,
|
|
Used: 500,
|
|
Free: 500,
|
|
},
|
|
CPU: 0.5,
|
|
Uptime: 10000,
|
|
}, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetNodeRRDData(ctx context.Context, node string, timeframe string, cf string, ds []string) ([]proxmox.NodeRRDPoint, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetLXCRRDData(ctx context.Context, node string, vmid int, timeframe string, cf string, ds []string) ([]proxmox.GuestRRDPoint, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetVMRRDData(ctx context.Context, node string, vmid int, timeframe string, cf string, ds []string) ([]proxmox.GuestRRDPoint, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetVMs(ctx context.Context, node string) ([]proxmox.VM, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetContainers(ctx context.Context, node string) ([]proxmox.Container, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetStorage(ctx context.Context, node string) ([]proxmox.Storage, error) {
|
|
return []proxmox.Storage{}, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetAllStorage(ctx context.Context) ([]proxmox.Storage, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetDisks(ctx context.Context, node string) ([]proxmox.Disk, error) {
|
|
return []proxmox.Disk{}, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetStorageContent(ctx context.Context, node, storage string) ([]proxmox.StorageContent, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetVMSnapshots(ctx context.Context, node string, vmid int) ([]proxmox.Snapshot, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetContainerSnapshots(ctx context.Context, node string, vmid int) ([]proxmox.Snapshot, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetZFSPoolsWithDetails(ctx context.Context, node string) ([]proxmox.ZFSPoolInfo, error) {
|
|
return []proxmox.ZFSPoolInfo{}, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetCephStatus(ctx context.Context) (*proxmox.CephStatus, error) {
|
|
return nil, fmt.Errorf("ceph not enabled")
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetCephDF(ctx context.Context) (*proxmox.CephDF, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetContainerStatus(ctx context.Context, node string, vmid int) (*proxmox.Container, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetContainerConfig(ctx context.Context, node string, vmid int) (map[string]interface{}, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetContainerInterfaces(ctx context.Context, node string, vmid int) ([]proxmox.ContainerInterface, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) IsClusterMember(ctx context.Context) (bool, error) {
|
|
return false, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetVMFSInfo(ctx context.Context, node string, vmid int) ([]proxmox.VMFileSystem, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetVMNetworkInterfaces(ctx context.Context, node string, vmid int) ([]proxmox.VMNetworkInterface, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetVMAgentInfo(ctx context.Context, node string, vmid int) (map[string]interface{}, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetVMAgentVersion(ctx context.Context, node string, vmid int) (string, error) {
|
|
return "", nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetVMMemAvailableFromAgent(ctx context.Context, node string, vmid int) (uint64, error) {
|
|
return 0, fmt.Errorf("not implemented")
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetZFSPoolStatus(ctx context.Context, node string) ([]proxmox.ZFSPoolStatus, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetNodePendingUpdates(ctx context.Context, node string) ([]proxmox.AptPackage, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetBackupTasks(ctx context.Context) ([]proxmox.Task, error) {
|
|
return []proxmox.Task{
|
|
{UPID: "UPID:node1:00001D1A:00000000:65E1E1E1:vzdump:101:root@pam:", Node: "node1", Status: "OK", StartTime: time.Now().Unix(), ID: "101"},
|
|
}, nil
|
|
}
|
|
|
|
func (m *mockPVEClientExtended) GetReplicationStatus(ctx context.Context) ([]proxmox.ReplicationJob, error) {
|
|
return []proxmox.ReplicationJob{
|
|
{ID: "101-0", Guest: "101", Target: "node2", LastSyncUnix: time.Now().Unix(), DurationSeconds: 10},
|
|
}, nil
|
|
}
|
|
|
|
func TestMonitor_PollBackupAndReplication(t *testing.T) {
|
|
m := &Monitor{
|
|
state: models.NewState(),
|
|
nodePendingUpdatesCache: make(map[string]pendingUpdatesCache),
|
|
}
|
|
|
|
client := &mockPVEClientExtended{}
|
|
m.pollBackupTasks(context.Background(), "pve-test", client)
|
|
|
|
state := m.state.GetSnapshot()
|
|
if len(state.PVEBackups.BackupTasks) != 1 {
|
|
t.Errorf("Expected 1 backup task, got %d", len(state.PVEBackups.BackupTasks))
|
|
}
|
|
|
|
m.pollReplicationStatus(context.Background(), "pve-test", client, []models.VM{{VMID: 101, Name: "vm1"}})
|
|
state = m.state.GetSnapshot()
|
|
if len(state.ReplicationJobs) != 1 {
|
|
t.Errorf("Expected 1 replication job, got %d", len(state.ReplicationJobs))
|
|
}
|
|
}
|
|
|
|
func TestMonitor_GetState(t *testing.T) {
|
|
m := &Monitor{
|
|
state: models.NewState(),
|
|
}
|
|
s := m.GetState()
|
|
if s.Nodes == nil {
|
|
t.Error("Expected non-nil nodes in state")
|
|
}
|
|
}
|
|
|
|
func TestPollPVEInstance(t *testing.T) {
|
|
// Setup Monitor
|
|
m := &Monitor{
|
|
config: &config.Config{
|
|
PVEInstances: []config.PVEInstance{
|
|
{Name: "pve-test", Host: "https://localhost:8006"},
|
|
},
|
|
},
|
|
state: models.NewState(),
|
|
pveClients: make(map[string]PVEClientInterface),
|
|
nodeLastOnline: make(map[string]time.Time),
|
|
nodeSnapshots: make(map[string]NodeMemorySnapshot),
|
|
guestSnapshots: make(map[string]GuestMemorySnapshot),
|
|
nodeRRDMemCache: make(map[string]rrdMemCacheEntry),
|
|
metricsHistory: NewMetricsHistory(32, time.Hour),
|
|
guestMetadataCache: make(map[string]guestMetadataCacheEntry),
|
|
guestMetadataLimiter: make(map[string]time.Time),
|
|
lastClusterCheck: make(map[string]time.Time),
|
|
lastPhysicalDiskPoll: make(map[string]time.Time),
|
|
lastPVEBackupPoll: make(map[string]time.Time),
|
|
lastPBSBackupPoll: make(map[string]time.Time),
|
|
authFailures: make(map[string]int),
|
|
lastAuthAttempt: make(map[string]time.Time),
|
|
pollStatusMap: make(map[string]*pollStatus),
|
|
nodePendingUpdatesCache: make(map[string]pendingUpdatesCache),
|
|
instanceInfoCache: make(map[string]*instanceInfo),
|
|
lastOutcome: make(map[string]taskOutcome),
|
|
failureCounts: make(map[string]int),
|
|
removedDockerHosts: make(map[string]time.Time),
|
|
dockerTokenBindings: make(map[string]string),
|
|
dockerCommands: make(map[string]*dockerHostCommand),
|
|
dockerCommandIndex: make(map[string]string),
|
|
guestAgentFSInfoTimeout: defaultGuestAgentFSInfoTimeout,
|
|
guestAgentNetworkTimeout: defaultGuestAgentNetworkTimeout,
|
|
guestAgentOSInfoTimeout: defaultGuestAgentOSInfoTimeout,
|
|
guestAgentVersionTimeout: defaultGuestAgentVersionTimeout,
|
|
guestAgentRetries: defaultGuestAgentRetries,
|
|
// alertManager and notificationMgr are needed if they are used
|
|
alertManager: alerts.NewManager(),
|
|
notificationMgr: notifications.NewNotificationManager(""), // Or mock
|
|
}
|
|
defer m.alertManager.Stop()
|
|
defer m.notificationMgr.Stop()
|
|
|
|
// Setup Mock Client
|
|
mockClient := &mockPVEClientExtended{
|
|
nodes: []proxmox.Node{
|
|
{Node: "node1", Status: "online"},
|
|
},
|
|
resources: []proxmox.ClusterResource{
|
|
{
|
|
Type: "qemu",
|
|
VMID: 100,
|
|
Name: "vm100",
|
|
Status: "running",
|
|
Node: "node1",
|
|
},
|
|
},
|
|
}
|
|
|
|
// Execute Poll
|
|
t.Log("Starting pollPVEInstance")
|
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
|
defer cancel()
|
|
|
|
m.pollPVEInstance(ctx, "pve-test", mockClient)
|
|
t.Log("Finished pollPVEInstance")
|
|
|
|
// Verify State Updates
|
|
foundNode := false
|
|
for _, n := range m.state.Nodes {
|
|
if n.Name == "node1" && n.Instance == "pve-test" {
|
|
foundNode = true
|
|
break
|
|
}
|
|
}
|
|
if !foundNode {
|
|
t.Error("Node node1 not found in state after polling")
|
|
}
|
|
|
|
// Note: pollPVEInstance only polls nodes. VM polling is done by pollVMsAndContainers/Efficient.
|
|
// However, pollPVEInstance might update resources if they are part of node structure? No.
|
|
// VMs are populated via pollVMsAndContainersEfficient.
|
|
// TestPollPVEInstance only checks Nodes?
|
|
// In actual Pulse execution, Monitor.Start calls pollPVEInstance THEN pollVMs...
|
|
|
|
// But let's check what pollPVEInstance returns. It returns nodes.
|
|
|
|
// If checking VM presence, we might fail if we don't call VM polling.
|
|
// But let's see what the original test expectation was.
|
|
// "foundVM" block below.
|
|
|
|
// Since we mock GetClusterResources in mockClient, maybe we expect VMs to be populated?
|
|
// pollPVEInstance does NOT call GetClusterResources.
|
|
// So checking VMs here is probably incorrect unless pollPVEInstance calls other things.
|
|
// I will remove VM check for now to focus on pollPVEInstance success.
|
|
}
|
|
|
|
func TestMonitor_MetricsGetters(t *testing.T) {
|
|
m := &Monitor{
|
|
metricsHistory: NewMetricsHistory(100, time.Hour),
|
|
alertManager: alerts.NewManager(),
|
|
incidentStore: &memory.IncidentStore{},
|
|
}
|
|
defer m.alertManager.Stop()
|
|
|
|
now := time.Now()
|
|
m.metricsHistory.AddGuestMetric("guest1", "cpu", 50.0, now)
|
|
m.metricsHistory.AddNodeMetric("node1", "memory", 60.0, now)
|
|
m.metricsHistory.AddStorageMetric("storage1", "usage", 70.0, now)
|
|
|
|
guestMetrics := m.GetGuestMetrics("guest1", time.Hour)
|
|
if len(guestMetrics["cpu"]) != 1 || guestMetrics["cpu"][0].Value != 50.0 {
|
|
t.Errorf("Expected guest1 cpu metric, got %v", guestMetrics)
|
|
}
|
|
|
|
nodeMetrics := m.GetNodeMetrics("node1", "memory", time.Hour)
|
|
if len(nodeMetrics) != 1 || nodeMetrics[0].Value != 60.0 {
|
|
t.Errorf("Expected node1 memory metric, got %v", nodeMetrics)
|
|
}
|
|
|
|
storageMetrics := m.GetStorageMetrics("storage1", time.Hour)
|
|
if len(storageMetrics["usage"]) != 1 || storageMetrics["usage"][0].Value != 70.0 {
|
|
t.Errorf("Expected storage1 usage metric, got %v", storageMetrics)
|
|
}
|
|
|
|
if m.GetAlertManager() != m.alertManager {
|
|
t.Error("GetAlertManager mismatch")
|
|
}
|
|
|
|
if m.GetIncidentStore() != m.incidentStore {
|
|
t.Error("GetIncidentStore mismatch")
|
|
}
|
|
}
|
|
|
|
func TestMonitor_AuthFailures(t *testing.T) {
|
|
m := &Monitor{
|
|
config: &config.Config{
|
|
PVEInstances: []config.PVEInstance{
|
|
{Name: "pve-fail", Host: "https://pve-fail:8006"},
|
|
},
|
|
},
|
|
state: models.NewState(),
|
|
authFailures: make(map[string]int),
|
|
lastAuthAttempt: make(map[string]time.Time),
|
|
}
|
|
|
|
// Record few failures
|
|
m.recordAuthFailure("pve-fail", "pve")
|
|
m.recordAuthFailure("pve-fail", "pve")
|
|
|
|
m.mu.Lock()
|
|
if m.authFailures["pve-pve-fail"] != 2 {
|
|
t.Errorf("Expected 2 failures, got %d", m.authFailures["pve-pve-fail"])
|
|
}
|
|
m.mu.Unlock()
|
|
|
|
// Reset
|
|
m.resetAuthFailures("pve-fail", "pve")
|
|
m.mu.Lock()
|
|
if _, ok := m.authFailures["pve-pve-fail"]; ok {
|
|
t.Error("Failure count should have been deleted")
|
|
}
|
|
m.mu.Unlock()
|
|
|
|
// Reach threshold
|
|
for i := 0; i < 5; i++ {
|
|
m.recordAuthFailure("pve-fail", "pve")
|
|
}
|
|
|
|
// Should have called removeFailedPVENode which puts a failed node in state
|
|
nodes := m.state.GetSnapshot().Nodes
|
|
found := false
|
|
for _, n := range nodes {
|
|
if n.Instance == "pve-fail" && n.ConnectionHealth == "error" {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
t.Error("Failed node not found in state after max failures")
|
|
}
|
|
}
|
|
|
|
func TestMonitor_EvaluateAgents(t *testing.T) {
|
|
m := &Monitor{
|
|
state: models.NewState(),
|
|
alertManager: alerts.NewManager(),
|
|
}
|
|
defer m.alertManager.Stop()
|
|
|
|
now := time.Now()
|
|
|
|
// Docker Host
|
|
m.state.UpsertDockerHost(models.DockerHost{
|
|
ID: "d1",
|
|
Hostname: "docker1",
|
|
LastSeen: now.Add(-1 * time.Hour),
|
|
IntervalSeconds: 60,
|
|
})
|
|
|
|
// Host agent
|
|
m.state.UpsertHost(models.Host{
|
|
ID: "h1",
|
|
Hostname: "host1",
|
|
LastSeen: now.Add(-1 * time.Hour),
|
|
IntervalSeconds: 60,
|
|
})
|
|
|
|
m.evaluateDockerAgents(now)
|
|
m.evaluateHostAgents(now)
|
|
|
|
for _, h := range m.state.GetDockerHosts() {
|
|
if h.ID == "d1" && h.Status != "offline" {
|
|
t.Errorf("Docker host should be offline, got %s", h.Status)
|
|
}
|
|
}
|
|
|
|
for _, h := range m.state.GetHosts() {
|
|
if h.ID == "h1" && h.Status != "offline" {
|
|
t.Errorf("Host should be offline, got %s", h.Status)
|
|
}
|
|
}
|
|
|
|
// Make them online
|
|
m.state.UpsertDockerHost(models.DockerHost{
|
|
ID: "d1",
|
|
Hostname: "docker1",
|
|
LastSeen: now,
|
|
IntervalSeconds: 60,
|
|
Status: "offline",
|
|
})
|
|
m.state.UpsertHost(models.Host{
|
|
ID: "h1",
|
|
Hostname: "host1",
|
|
LastSeen: now,
|
|
IntervalSeconds: 60,
|
|
Status: "offline",
|
|
})
|
|
|
|
m.evaluateDockerAgents(now)
|
|
m.evaluateHostAgents(now)
|
|
|
|
for _, h := range m.state.GetDockerHosts() {
|
|
if h.ID == "d1" && h.Status != "online" {
|
|
t.Errorf("Docker host should be online, got %s", h.Status)
|
|
}
|
|
}
|
|
|
|
for _, h := range m.state.GetHosts() {
|
|
if h.ID == "h1" && h.Status != "online" {
|
|
t.Errorf("Host should be online, got %s", h.Status)
|
|
}
|
|
}
|
|
}
|