From f4e2cbca48ab828710f05df6f86a7fd98ecf5ebe Mon Sep 17 00:00:00 2001 From: rcourtman Date: Thu, 20 Nov 2025 09:45:36 +0000 Subject: [PATCH] Improve token collision handling and installer subnet support --- .../components/Settings/APITokenManager.tsx | 16 ++- .../src/components/Settings/HostAgents.tsx | 91 ++++++++++++++ internal/monitoring/monitor.go | 74 +++++++++++- .../monitoring/monitor_host_agents_test.go | 95 ++++++++++++++- scripts/install-sensor-proxy.sh | 113 ++++++++++++++++-- 5 files changed, 370 insertions(+), 19 deletions(-) diff --git a/frontend-modern/src/components/Settings/APITokenManager.tsx b/frontend-modern/src/components/Settings/APITokenManager.tsx index f75d5e894..4bb203dd4 100644 --- a/frontend-modern/src/components/Settings/APITokenManager.tsx +++ b/frontend-modern/src/components/Settings/APITokenManager.tsx @@ -626,7 +626,21 @@ export const APITokenManager: Component = (props) => { class="px-5 py-3 text-gray-600 dark:text-gray-400" title={usageTitleSegments.length > 0 ? usageTitleSegments.join('\n') : undefined} > - {hostSummary} +
+ {hostSummary} + 1}> + + + + + Host agents sharing this token ({hostUsageEntry!.count}) + + +
{formatRelativeTime(new Date(token.createdAt).getTime())} diff --git a/frontend-modern/src/components/Settings/HostAgents.tsx b/frontend-modern/src/components/Settings/HostAgents.tsx index dd33f2693..ef749a4da 100644 --- a/frontend-modern/src/components/Settings/HostAgents.tsx +++ b/frontend-modern/src/components/Settings/HostAgents.tsx @@ -152,6 +152,33 @@ export const HostAgents: Component = () => { return [...list].sort((a, b) => (a.hostname || '').localeCompare(b.hostname || '')); }); + const hostTokenUsage = createMemo(() => { + type UsageHost = { id: string; label: string }; + const usage = new Map(); + for (const host of allHosts()) { + const tokenId = host.tokenId; + if (!tokenId) continue; + const label = host.displayName?.trim() || host.hostname || host.id; + const prev = usage.get(tokenId); + if (prev) { + usage.set(tokenId, { count: prev.count + 1, hosts: [...prev.hosts, { id: host.id, label }] }); + } else { + usage.set(tokenId, { count: 1, hosts: [{ id: host.id, label }] }); + } + } + return usage; + }); + + const reusedTokens = createMemo(() => { + const entries: { tokenId: string; hosts: { id: string; label: string }[] }[] = []; + hostTokenUsage().forEach((value, tokenId) => { + if (value.count > 1) { + entries.push({ tokenId, hosts: value.hosts }); + } + }); + return entries; + }); + const renderTags = (host: Host) => { const tags = host.tags ?? []; if (!tags.length) return '—'; @@ -837,6 +864,56 @@ Remove-Item '$env:ProgramData\\\\Pulse\\\\pulse-host-agent.log' -Force -ErrorAct {allHosts().length} connected + 0}> +
+
+ + + +
+

Token re-use detected across host agents.

+

+ Generate a new host-agent token per machine or set a unique --agent-id to stop hosts from overwriting each other. +

+ + + +
+ + {(item) => ( +
+ + token {item.tokenId.slice(0, 6)}…{item.tokenId.slice(-4)} + + + used by {item.hosts.length} hosts: {item.hosts.map((host) => host.label).join(', ')} + +
+ )} +
+
+
+
+
+
+ 0} fallback={ @@ -881,6 +958,8 @@ Remove-Item '$env:ProgramData\\\\Pulse\\\\pulse-host-agent.log' -Force -ErrorAct const isStale = staleness.isStale; const tokenRevokedAt = host.tokenRevokedAt; const tokenRevoked = typeof tokenRevokedAt === 'number'; + const tokenUsageEntry = host.tokenId ? hostTokenUsage().get(host.tokenId) : undefined; + const tokenReused = tokenUsageEntry ? tokenUsageEntry.count > 1 : false; const status = (host.status || 'unknown').toLowerCase(); const isOnline = status === 'online' || status === 'running' || status === 'healthy'; @@ -910,6 +989,18 @@ Remove-Item '$env:ProgramData\\\\Pulse\\\\pulse-host-agent.log' -Force -ErrorAct
{host.hostname}
+ +
+ + + + Token reused ({tokenUsageEntry?.count}) +
+
Agent {host.agentVersion} diff --git a/internal/monitoring/monitor.go b/internal/monitoring/monitor.go index 8a2449ee1..49a45ef5b 100644 --- a/internal/monitoring/monitor.go +++ b/internal/monitoring/monitor.go @@ -577,6 +577,7 @@ type Monitor struct { nodeRRDMemCache map[string]rrdMemCacheEntry removedDockerHosts map[string]time.Time // Track deliberately removed Docker hosts (ID -> removal time) dockerTokenBindings map[string]string // Track token ID -> agent ID bindings to enforce uniqueness + hostTokenBindings map[string]string // Track token ID -> agent ID bindings to enforce uniqueness dockerCommands map[string]*dockerHostCommand dockerCommandIndex map[string]string guestMetadataMu sync.RWMutex @@ -1107,6 +1108,18 @@ func (m *Monitor) RemoveHostAgent(hostID string) (models.Host, error) { } } + if host.TokenID != "" { + m.mu.Lock() + if _, exists := m.hostTokenBindings[host.TokenID]; exists { + delete(m.hostTokenBindings, host.TokenID) + log.Debug(). + Str("tokenID", host.TokenID). + Str("hostID", hostID). + Msg("Unbound host agent token from removed host") + } + m.mu.Unlock() + } + m.state.RemoveConnectionHealth(hostConnectionPrefix + hostID) log.Info(). @@ -1587,7 +1600,7 @@ func (m *Monitor) ApplyDockerReport(report agentsdocker.Report, tokenRecord *con Str("dockerHostID", identifier). Time("removedAt", removedAt). Msg("Rejecting report from deliberately removed Docker host") - return models.DockerHost{}, fmt.Errorf("docker host %q was removed at %v and cannot report again", identifier, removedAt.Format(time.RFC3339)) + return models.DockerHost{}, fmt.Errorf("docker host %q was removed at %v and cannot report again. Use Allow re-enroll in Settings -> Docker -> Removed hosts or rerun the installer with a docker:manage token to clear this block", identifier, removedAt.Format(time.RFC3339)) } // Enforce token uniqueness: each token can only be bound to one agent @@ -1966,6 +1979,64 @@ func (m *Monitor) ApplyHostReport(report agentshost.Report, tokenRecord *config. } existingHosts := m.state.GetHosts() + + agentID := strings.TrimSpace(report.Agent.ID) + if agentID == "" { + agentID = identifier + } + + if tokenRecord != nil && tokenRecord.ID != "" { + tokenID := strings.TrimSpace(tokenRecord.ID) + bindingID := agentID + if bindingID == "" { + bindingID = identifier + } + + m.mu.Lock() + if m.hostTokenBindings == nil { + m.hostTokenBindings = make(map[string]string) + } + if boundID, exists := m.hostTokenBindings[tokenID]; exists && boundID != bindingID { + m.mu.Unlock() + + conflictingHost := "unknown" + for _, candidate := range existingHosts { + if candidate.TokenID == tokenID || candidate.ID == boundID { + conflictingHost = candidate.Hostname + if candidate.DisplayName != "" { + conflictingHost = candidate.DisplayName + } + break + } + } + + tokenHint := tokenHintFromRecord(tokenRecord) + if tokenHint != "" { + tokenHint = " (" + tokenHint + ")" + } + + log.Warn(). + Str("tokenID", tokenID). + Str("tokenHint", tokenHint). + Str("reportingAgentID", bindingID). + Str("boundAgentID", boundID). + Str("conflictingHost", conflictingHost). + Msg("Rejecting host report: token already bound to different agent") + + return models.Host{}, fmt.Errorf("API token%s is already in use by host %q (agent: %s). Generate a new token or set --agent-id before reusing it", tokenHint, conflictingHost, boundID) + } + + if _, exists := m.hostTokenBindings[tokenID]; !exists { + m.hostTokenBindings[tokenID] = bindingID + log.Debug(). + Str("tokenID", tokenID). + Str("agentID", bindingID). + Str("hostname", hostname). + Msg("Bound host agent token to agent identity") + } + m.mu.Unlock() + } + var previous models.Host var hasPrevious bool for _, candidate := range existingHosts { @@ -3740,6 +3811,7 @@ func New(cfg *config.Config) (*Monitor, error) { nodeRRDMemCache: make(map[string]rrdMemCacheEntry), removedDockerHosts: make(map[string]time.Time), dockerTokenBindings: make(map[string]string), + hostTokenBindings: make(map[string]string), dockerCommands: make(map[string]*dockerHostCommand), dockerCommandIndex: make(map[string]string), guestMetadataCache: make(map[string]guestMetadataCacheEntry), diff --git a/internal/monitoring/monitor_host_agents_test.go b/internal/monitoring/monitor_host_agents_test.go index ffcfc78a9..c38e43c84 100644 --- a/internal/monitoring/monitor_host_agents_test.go +++ b/internal/monitoring/monitor_host_agents_test.go @@ -5,15 +5,19 @@ import ( "time" "github.com/rcourtman/pulse-go-rewrite/internal/alerts" + "github.com/rcourtman/pulse-go-rewrite/internal/config" "github.com/rcourtman/pulse-go-rewrite/internal/models" + agentshost "github.com/rcourtman/pulse-go-rewrite/pkg/agents/host" ) func TestEvaluateHostAgentsTriggersOfflineAlert(t *testing.T) { t.Helper() monitor := &Monitor{ - state: models.NewState(), - alertManager: alerts.NewManager(), + state: models.NewState(), + alertManager: alerts.NewManager(), + hostTokenBindings: make(map[string]string), + config: &config.Config{}, } t.Cleanup(func() { monitor.alertManager.Stop() }) @@ -68,8 +72,10 @@ func TestEvaluateHostAgentsClearsAlertWhenHostReturns(t *testing.T) { t.Helper() monitor := &Monitor{ - state: models.NewState(), - alertManager: alerts.NewManager(), + state: models.NewState(), + alertManager: alerts.NewManager(), + hostTokenBindings: make(map[string]string), + config: &config.Config{}, } t.Cleanup(func() { monitor.alertManager.Stop() }) @@ -110,3 +116,84 @@ func TestEvaluateHostAgentsClearsAlertWhenHostReturns(t *testing.T) { } } } + +func TestApplyHostReportRejectsTokenReuseAcrossAgents(t *testing.T) { + t.Helper() + + monitor := &Monitor{ + state: models.NewState(), + alertManager: alerts.NewManager(), + hostTokenBindings: make(map[string]string), + config: &config.Config{}, + } + t.Cleanup(func() { monitor.alertManager.Stop() }) + + now := time.Now().UTC() + baseReport := agentshost.Report{ + Agent: agentshost.AgentInfo{ + ID: "agent-one", + Version: "1.0.0", + IntervalSeconds: 30, + }, + Host: agentshost.HostInfo{ + ID: "machine-one", + Hostname: "host-one", + Platform: "linux", + OSName: "debian", + OSVersion: "12", + }, + Timestamp: now, + Metrics: agentshost.Metrics{ + CPUUsagePercent: 1.0, + }, + } + + token := &config.APITokenRecord{ID: "token-one", Name: "Token One"} + + hostOne, err := monitor.ApplyHostReport(baseReport, token) + if err != nil { + t.Fatalf("ApplyHostReport hostOne: %v", err) + } + if hostOne.ID == "" { + t.Fatalf("expected hostOne to have an identifier") + } + + secondReport := baseReport + secondReport.Agent.ID = "agent-two" + secondReport.Host.ID = "machine-two" + secondReport.Host.Hostname = "host-two" + secondReport.Timestamp = now.Add(30 * time.Second) + + if _, err := monitor.ApplyHostReport(secondReport, token); err == nil { + t.Fatalf("expected token reuse across agents to be rejected") + } +} + +func TestRemoveHostAgentUnbindsToken(t *testing.T) { + t.Helper() + + monitor := &Monitor{ + state: models.NewState(), + alertManager: alerts.NewManager(), + hostTokenBindings: make(map[string]string), + config: &config.Config{}, + } + t.Cleanup(func() { monitor.alertManager.Stop() }) + + hostID := "host-to-remove" + tokenID := "token-remove" + monitor.state.UpsertHost(models.Host{ + ID: hostID, + Hostname: "remove.me", + TokenID: tokenID, + }) + monitor.hostTokenBindings[tokenID] = "agent-remove" + + if _, err := monitor.RemoveHostAgent(hostID); err != nil { + t.Fatalf("RemoveHostAgent: %v", err) + } + + if _, exists := monitor.hostTokenBindings[tokenID]; exists { + t.Fatalf("expected token binding to be cleared after host removal") + } +} diff --git a/scripts/install-sensor-proxy.sh b/scripts/install-sensor-proxy.sh index 7f52692b4..20ce1c334 100755 --- a/scripts/install-sensor-proxy.sh +++ b/scripts/install-sensor-proxy.sh @@ -130,6 +130,69 @@ clear_pending_control_plane() { rm -f "$PENDING_CONTROL_PLANE_FILE" 2>/dev/null || true } +format_ip_to_cidr() { + local ip="$1" + if [[ -z "$ip" ]]; then + return + fi + + if [[ "$ip" == */* ]]; then + printf '%s' "$ip" + return + fi + + if [[ "$ip" == *:* ]]; then + printf '%s/128' "$ip" + else + printf '%s/32' "$ip" + fi +} + +ensure_allowed_source_subnet() { + local subnet="$1" + if [[ -z "$subnet" || ! -f "$CONFIG_FILE" ]]; then + return + fi + + local escaped_subnet="${subnet//\//\\/}" + if grep -Eq "^[[:space:]]+-[[:space:]]*${escaped_subnet}([[:space:]]|$)" "$CONFIG_FILE"; then + return + fi + + local tmp + tmp=$(mktemp) + + if grep -Eq "^[[:space:]]*allowed_source_subnets:" "$CONFIG_FILE"; then + awk -v subnet="$subnet" ' +/^allowed_source_subnets:/ {print; in_block=1; next} +in_block && /^[^[:space:]]/ { + if (!added) { printf(" - %s\n", subnet); added=1 } + in_block=0 +} +{print} +END { + if (in_block && !added) { + printf(" - %s\n", subnet) + } +} +' "$CONFIG_FILE" > "$tmp" + else + cat "$CONFIG_FILE" > "$tmp" + { + echo "" + echo "allowed_source_subnets:" + echo " - $subnet" + } >> "$tmp" + fi + + if mv "$tmp" "$CONFIG_FILE"; then + print_info "Added allowed_source_subnets entry ${subnet}" + else + rm -f "$tmp" + print_warn "Failed to update allowed_source_subnets with ${subnet}" + fi +} + configure_local_authorized_key() { local auth_line=$1 @@ -1833,10 +1896,10 @@ if [[ "$HTTP_MODE" == true ]]; then chown pulse-sensor-proxy:pulse-sensor-proxy /etc/pulse-sensor-proxy/.http-auth-token # Backup config and token files before modifying - if [[ -f /etc/pulse-sensor-proxy/config.yaml ]]; then + if [[ -f "$CONFIG_FILE" ]]; then BACKUP_TIMESTAMP="$(date +%s)" - BACKUP_CONFIG="/etc/pulse-sensor-proxy/config.yaml.backup.$BACKUP_TIMESTAMP" - cp /etc/pulse-sensor-proxy/config.yaml "$BACKUP_CONFIG" + BACKUP_CONFIG="${CONFIG_FILE}.backup.$BACKUP_TIMESTAMP" + cp "$CONFIG_FILE" "$BACKUP_CONFIG" print_info "Config backed up to: $BACKUP_CONFIG" # Also backup token files so rollback restores matching secrets @@ -1846,10 +1909,10 @@ if [[ "$HTTP_MODE" == true ]]; then fi # Remove any existing HTTP configuration to prevent duplicates - if grep -q "^# HTTP Mode Configuration" /etc/pulse-sensor-proxy/config.yaml; then + if grep -q "^# HTTP Mode Configuration" "$CONFIG_FILE"; then print_info "Removing existing HTTP configuration..." # Remove from "# HTTP Mode Configuration" to end of file - sed -i '/^# HTTP Mode Configuration/,$ d' /etc/pulse-sensor-proxy/config.yaml + sed -i '/^# HTTP Mode Configuration/,$ d' "$CONFIG_FILE" fi fi @@ -1866,15 +1929,38 @@ if [[ "$HTTP_MODE" == true ]]; then print_info "Pulse server detected at: $PULSE_IP" + HTTP_ALLOWED_SUBNETS=() + PULSE_HTTP_SUBNET="$(format_ip_to_cidr "$PULSE_IP")" + LOCAL_HTTP_SUBNET="$(format_ip_to_cidr "$PRIMARY_IP")" + LOOPBACK_HTTP_SUBNET="127.0.0.1/32" + + [[ -n "$PULSE_HTTP_SUBNET" ]] && HTTP_ALLOWED_SUBNETS+=("$PULSE_HTTP_SUBNET") + HTTP_ALLOWED_SUBNETS+=("$LOOPBACK_HTTP_SUBNET") + [[ -n "$LOCAL_HTTP_SUBNET" ]] && HTTP_ALLOWED_SUBNETS+=("$LOCAL_HTTP_SUBNET") + + declare -A HTTP_SUBNET_SEEN=() + deduped_http_subnets=() + for subnet in "${HTTP_ALLOWED_SUBNETS[@]}"; do + [[ -z "$subnet" ]] && continue + if [[ -z "${HTTP_SUBNET_SEEN[$subnet]+x}" ]]; then + HTTP_SUBNET_SEEN[$subnet]=1 + deduped_http_subnets+=("$subnet") + fi + done + HTTP_ALLOWED_SUBNETS=("${deduped_http_subnets[@]}") + # Configure HTTP mode - check if already configured to avoid duplicates print_info "Configuring HTTP mode..." - if grep -q "^http_enabled:" /etc/pulse-sensor-proxy/config.yaml 2>/dev/null; then + if grep -q "^http_enabled:" "$CONFIG_FILE" 2>/dev/null; then # HTTP mode already configured - only update the token (avoid duplicates) - sed -i "s|^http_auth_token:.*|http_auth_token: $HTTP_AUTH_TOKEN|" /etc/pulse-sensor-proxy/config.yaml + sed -i "s|^http_auth_token:.*|http_auth_token: $HTTP_AUTH_TOKEN|" "$CONFIG_FILE" + for subnet in "${HTTP_ALLOWED_SUBNETS[@]}"; do + ensure_allowed_source_subnet "$subnet" + done print_info "Updated HTTP auth token (existing HTTP mode configuration kept)" else # Fresh HTTP mode configuration - append to file - cat >> /etc/pulse-sensor-proxy/config.yaml << EOF + cat >> "$CONFIG_FILE" << EOF # HTTP Mode Configuration (External PVE Host) http_enabled: true @@ -1883,14 +1969,15 @@ http_tls_cert: /etc/pulse-sensor-proxy/tls/server.crt http_tls_key: /etc/pulse-sensor-proxy/tls/server.key http_auth_token: "$HTTP_AUTH_TOKEN" -# Allow HTTP connections from Pulse server and localhost (for self-monitoring) +# Allow HTTP connections from Pulse server, localhost, and this host allowed_source_subnets: - - $PULSE_IP/32 - - 127.0.0.1/32 EOF + for subnet in "${HTTP_ALLOWED_SUBNETS[@]}"; do + echo " - $subnet" >> "$CONFIG_FILE" + done fi - chown pulse-sensor-proxy:pulse-sensor-proxy /etc/pulse-sensor-proxy/config.yaml - chmod 0644 /etc/pulse-sensor-proxy/config.yaml + chown pulse-sensor-proxy:pulse-sensor-proxy "$CONFIG_FILE" + chmod 0644 "$CONFIG_FILE" print_success "HTTP mode configured successfully" echo ""