Improve token collision handling and installer subnet support

This commit is contained in:
rcourtman 2025-11-20 09:45:36 +00:00
parent 22f9750044
commit f4e2cbca48
5 changed files with 370 additions and 19 deletions

View file

@ -626,7 +626,21 @@ export const APITokenManager: Component<APITokenManagerProps> = (props) => {
class="px-5 py-3 text-gray-600 dark:text-gray-400"
title={usageTitleSegments.length > 0 ? usageTitleSegments.join('\n') : undefined}
>
{hostSummary}
<div class="flex flex-wrap items-center gap-2">
<span>{hostSummary}</span>
<Show when={hostUsageEntry && hostUsageEntry.count > 1}>
<span class="inline-flex items-center gap-1 rounded-full bg-amber-100 px-2 py-0.5 text-[11px] font-semibold text-amber-800 dark:bg-amber-900/40 dark:text-amber-200">
<svg class="h-3 w-3" viewBox="0 0 20 20" fill="currentColor">
<path
fill-rule="evenodd"
d="M8.257 3.099c.764-1.36 2.722-1.36 3.486 0l6.518 11.62c.75 1.338-.213 3.005-1.743 3.005H3.482c-1.53 0-2.493-1.667-1.743-3.005l6.518-11.62ZM11 5a1 1 0 1 0-2 0v4.5a1 1 0 1 0 2 0V5Zm0 8a1 1 0 1 0-2 0 1 1 0 0 0 2 0Z"
clip-rule="evenodd"
/>
</svg>
Host agents sharing this token ({hostUsageEntry!.count})
</span>
</Show>
</div>
</td>
<td class="px-5 py-3 text-gray-600 dark:text-gray-400">
{formatRelativeTime(new Date(token.createdAt).getTime())}

View file

@ -152,6 +152,33 @@ export const HostAgents: Component = () => {
return [...list].sort((a, b) => (a.hostname || '').localeCompare(b.hostname || ''));
});
const hostTokenUsage = createMemo(() => {
type UsageHost = { id: string; label: string };
const usage = new Map<string, { count: number; hosts: UsageHost[] }>();
for (const host of allHosts()) {
const tokenId = host.tokenId;
if (!tokenId) continue;
const label = host.displayName?.trim() || host.hostname || host.id;
const prev = usage.get(tokenId);
if (prev) {
usage.set(tokenId, { count: prev.count + 1, hosts: [...prev.hosts, { id: host.id, label }] });
} else {
usage.set(tokenId, { count: 1, hosts: [{ id: host.id, label }] });
}
}
return usage;
});
const reusedTokens = createMemo(() => {
const entries: { tokenId: string; hosts: { id: string; label: string }[] }[] = [];
hostTokenUsage().forEach((value, tokenId) => {
if (value.count > 1) {
entries.push({ tokenId, hosts: value.hosts });
}
});
return entries;
});
const renderTags = (host: Host) => {
const tags = host.tags ?? [];
if (!tags.length) return '—';
@ -837,6 +864,56 @@ Remove-Item '$env:ProgramData\\\\Pulse\\\\pulse-host-agent.log' -Force -ErrorAct
<span class="text-sm text-gray-500 dark:text-gray-400">{allHosts().length} connected</span>
</div>
<Show when={reusedTokens().length > 0}>
<div class="rounded-lg border border-amber-300 bg-amber-50 px-4 py-3 text-sm text-amber-900 shadow-sm dark:border-amber-800 dark:bg-amber-900/20 dark:text-amber-100">
<div class="flex items-start gap-3">
<svg class="h-4 w-4 mt-0.5 text-amber-600 dark:text-amber-300" viewBox="0 0 20 20" fill="currentColor">
<path
fill-rule="evenodd"
d="M8.257 3.099c.764-1.36 2.722-1.36 3.486 0l6.518 11.62c.75 1.338-.213 3.005-1.743 3.005H3.482c-1.53 0-2.493-1.667-1.743-3.005l6.518-11.62ZM11 5a1 1 0 1 0-2 0v4.5a1 1 0 1 0 2 0V5Zm0 8a1 1 0 1 0-2 0 1 1 0 0 0 2 0Z"
clip-rule="evenodd"
/>
</svg>
<div class="space-y-1">
<p class="font-semibold">Token re-use detected across host agents.</p>
<p class="leading-snug text-sm">
Generate a new host-agent token per machine or set a unique <code class="rounded bg-amber-100 px-1 py-0.5 font-mono text-[11px] text-amber-800 dark:bg-amber-900/40 dark:text-amber-100">--agent-id</code> to stop hosts from overwriting each other.
</p>
<Show when={requiresToken()}>
<button
type="button"
class="inline-flex items-center gap-2 rounded-md bg-amber-600 px-3 py-1.5 text-xs font-semibold text-white shadow-sm transition hover:bg-amber-700 disabled:opacity-60 disabled:cursor-not-allowed dark:bg-amber-500 dark:hover:bg-amber-400"
disabled={isGeneratingToken()}
onClick={() => {
void handleGenerateToken();
}}
>
<svg class="h-3.5 w-3.5" viewBox="0 0 20 20" fill="currentColor">
<path d="M11 17a1 1 0 01-2 0v-4.086l-1.293 1.293a1 1 0 11-1.414-1.414l3-3a1 1 0 011.414 0l3 3a1 1 0 01-1.414 1.414L11 12.914V17z" />
<path d="M5 3a2 2 0 00-2 2v6a2 2 0 002 2h2a1 1 0 100-2H5V5h10v6h-2a1 1 0 100 2h2a2 2 0 002-2V5a2 2 0 00-2-2H5z" />
</svg>
{isGeneratingToken() ? 'Generating…' : 'Generate fresh token'}
</button>
</Show>
<div class="text-xs text-amber-800 dark:text-amber-200">
<For each={reusedTokens()}>
{(item) => (
<div class="flex flex-wrap gap-1 py-0.5">
<span class="rounded-full bg-amber-100 px-2 py-0.5 font-mono text-[11px] font-semibold text-amber-800 dark:bg-amber-900/50 dark:text-amber-200">
token {item.tokenId.slice(0, 6)}{item.tokenId.slice(-4)}
</span>
<span class="text-amber-900 dark:text-amber-100">
used by {item.hosts.length} hosts: {item.hosts.map((host) => host.label).join(', ')}
</span>
</div>
)}
</For>
</div>
</div>
</div>
</div>
</Show>
<Show
when={allHosts().length > 0}
fallback={
@ -881,6 +958,8 @@ Remove-Item '$env:ProgramData\\\\Pulse\\\\pulse-host-agent.log' -Force -ErrorAct
const isStale = staleness.isStale;
const tokenRevokedAt = host.tokenRevokedAt;
const tokenRevoked = typeof tokenRevokedAt === 'number';
const tokenUsageEntry = host.tokenId ? hostTokenUsage().get(host.tokenId) : undefined;
const tokenReused = tokenUsageEntry ? tokenUsageEntry.count > 1 : false;
const status = (host.status || 'unknown').toLowerCase();
const isOnline =
status === 'online' || status === 'running' || status === 'healthy';
@ -910,6 +989,18 @@ Remove-Item '$env:ProgramData\\\\Pulse\\\\pulse-host-agent.log' -Force -ErrorAct
<div class="text-xs text-gray-500 dark:text-gray-400">
{host.hostname}
</div>
<Show when={tokenReused}>
<div class="mt-1 inline-flex items-center gap-1 rounded-full bg-amber-100 px-2 py-0.5 text-[11px] font-semibold text-amber-800 dark:bg-amber-900/40 dark:text-amber-200">
<svg class="h-3 w-3" viewBox="0 0 20 20" fill="currentColor">
<path
fill-rule="evenodd"
d="M8.257 3.099c.764-1.36 2.722-1.36 3.486 0l6.518 11.62c.75 1.338-.213 3.005-1.743 3.005H3.482c-1.53 0-2.493-1.667-1.743-3.005l6.518-11.62ZM11 5a1 1 0 1 0-2 0v4.5a1 1 0 1 0 2 0V5Zm0 8a1 1 0 1 0-2 0 1 1 0 0 0 2 0Z"
clip-rule="evenodd"
/>
</svg>
Token reused ({tokenUsageEntry?.count})
</div>
</Show>
<Show when={host.agentVersion}>
<div class="text-xs text-gray-400 dark:text-gray-500 mt-1">
Agent {host.agentVersion}

View file

@ -577,6 +577,7 @@ type Monitor struct {
nodeRRDMemCache map[string]rrdMemCacheEntry
removedDockerHosts map[string]time.Time // Track deliberately removed Docker hosts (ID -> removal time)
dockerTokenBindings map[string]string // Track token ID -> agent ID bindings to enforce uniqueness
hostTokenBindings map[string]string // Track token ID -> agent ID bindings to enforce uniqueness
dockerCommands map[string]*dockerHostCommand
dockerCommandIndex map[string]string
guestMetadataMu sync.RWMutex
@ -1107,6 +1108,18 @@ func (m *Monitor) RemoveHostAgent(hostID string) (models.Host, error) {
}
}
if host.TokenID != "" {
m.mu.Lock()
if _, exists := m.hostTokenBindings[host.TokenID]; exists {
delete(m.hostTokenBindings, host.TokenID)
log.Debug().
Str("tokenID", host.TokenID).
Str("hostID", hostID).
Msg("Unbound host agent token from removed host")
}
m.mu.Unlock()
}
m.state.RemoveConnectionHealth(hostConnectionPrefix + hostID)
log.Info().
@ -1587,7 +1600,7 @@ func (m *Monitor) ApplyDockerReport(report agentsdocker.Report, tokenRecord *con
Str("dockerHostID", identifier).
Time("removedAt", removedAt).
Msg("Rejecting report from deliberately removed Docker host")
return models.DockerHost{}, fmt.Errorf("docker host %q was removed at %v and cannot report again", identifier, removedAt.Format(time.RFC3339))
return models.DockerHost{}, fmt.Errorf("docker host %q was removed at %v and cannot report again. Use Allow re-enroll in Settings -> Docker -> Removed hosts or rerun the installer with a docker:manage token to clear this block", identifier, removedAt.Format(time.RFC3339))
}
// Enforce token uniqueness: each token can only be bound to one agent
@ -1966,6 +1979,64 @@ func (m *Monitor) ApplyHostReport(report agentshost.Report, tokenRecord *config.
}
existingHosts := m.state.GetHosts()
agentID := strings.TrimSpace(report.Agent.ID)
if agentID == "" {
agentID = identifier
}
if tokenRecord != nil && tokenRecord.ID != "" {
tokenID := strings.TrimSpace(tokenRecord.ID)
bindingID := agentID
if bindingID == "" {
bindingID = identifier
}
m.mu.Lock()
if m.hostTokenBindings == nil {
m.hostTokenBindings = make(map[string]string)
}
if boundID, exists := m.hostTokenBindings[tokenID]; exists && boundID != bindingID {
m.mu.Unlock()
conflictingHost := "unknown"
for _, candidate := range existingHosts {
if candidate.TokenID == tokenID || candidate.ID == boundID {
conflictingHost = candidate.Hostname
if candidate.DisplayName != "" {
conflictingHost = candidate.DisplayName
}
break
}
}
tokenHint := tokenHintFromRecord(tokenRecord)
if tokenHint != "" {
tokenHint = " (" + tokenHint + ")"
}
log.Warn().
Str("tokenID", tokenID).
Str("tokenHint", tokenHint).
Str("reportingAgentID", bindingID).
Str("boundAgentID", boundID).
Str("conflictingHost", conflictingHost).
Msg("Rejecting host report: token already bound to different agent")
return models.Host{}, fmt.Errorf("API token%s is already in use by host %q (agent: %s). Generate a new token or set --agent-id before reusing it", tokenHint, conflictingHost, boundID)
}
if _, exists := m.hostTokenBindings[tokenID]; !exists {
m.hostTokenBindings[tokenID] = bindingID
log.Debug().
Str("tokenID", tokenID).
Str("agentID", bindingID).
Str("hostname", hostname).
Msg("Bound host agent token to agent identity")
}
m.mu.Unlock()
}
var previous models.Host
var hasPrevious bool
for _, candidate := range existingHosts {
@ -3740,6 +3811,7 @@ func New(cfg *config.Config) (*Monitor, error) {
nodeRRDMemCache: make(map[string]rrdMemCacheEntry),
removedDockerHosts: make(map[string]time.Time),
dockerTokenBindings: make(map[string]string),
hostTokenBindings: make(map[string]string),
dockerCommands: make(map[string]*dockerHostCommand),
dockerCommandIndex: make(map[string]string),
guestMetadataCache: make(map[string]guestMetadataCacheEntry),

View file

@ -5,15 +5,19 @@ import (
"time"
"github.com/rcourtman/pulse-go-rewrite/internal/alerts"
"github.com/rcourtman/pulse-go-rewrite/internal/config"
"github.com/rcourtman/pulse-go-rewrite/internal/models"
agentshost "github.com/rcourtman/pulse-go-rewrite/pkg/agents/host"
)
func TestEvaluateHostAgentsTriggersOfflineAlert(t *testing.T) {
t.Helper()
monitor := &Monitor{
state: models.NewState(),
alertManager: alerts.NewManager(),
state: models.NewState(),
alertManager: alerts.NewManager(),
hostTokenBindings: make(map[string]string),
config: &config.Config{},
}
t.Cleanup(func() { monitor.alertManager.Stop() })
@ -68,8 +72,10 @@ func TestEvaluateHostAgentsClearsAlertWhenHostReturns(t *testing.T) {
t.Helper()
monitor := &Monitor{
state: models.NewState(),
alertManager: alerts.NewManager(),
state: models.NewState(),
alertManager: alerts.NewManager(),
hostTokenBindings: make(map[string]string),
config: &config.Config{},
}
t.Cleanup(func() { monitor.alertManager.Stop() })
@ -110,3 +116,84 @@ func TestEvaluateHostAgentsClearsAlertWhenHostReturns(t *testing.T) {
}
}
}
func TestApplyHostReportRejectsTokenReuseAcrossAgents(t *testing.T) {
t.Helper()
monitor := &Monitor{
state: models.NewState(),
alertManager: alerts.NewManager(),
hostTokenBindings: make(map[string]string),
config: &config.Config{},
}
t.Cleanup(func() { monitor.alertManager.Stop() })
now := time.Now().UTC()
baseReport := agentshost.Report{
Agent: agentshost.AgentInfo{
ID: "agent-one",
Version: "1.0.0",
IntervalSeconds: 30,
},
Host: agentshost.HostInfo{
ID: "machine-one",
Hostname: "host-one",
Platform: "linux",
OSName: "debian",
OSVersion: "12",
},
Timestamp: now,
Metrics: agentshost.Metrics{
CPUUsagePercent: 1.0,
},
}
token := &config.APITokenRecord{ID: "token-one", Name: "Token One"}
hostOne, err := monitor.ApplyHostReport(baseReport, token)
if err != nil {
t.Fatalf("ApplyHostReport hostOne: %v", err)
}
if hostOne.ID == "" {
t.Fatalf("expected hostOne to have an identifier")
}
secondReport := baseReport
secondReport.Agent.ID = "agent-two"
secondReport.Host.ID = "machine-two"
secondReport.Host.Hostname = "host-two"
secondReport.Timestamp = now.Add(30 * time.Second)
if _, err := monitor.ApplyHostReport(secondReport, token); err == nil {
t.Fatalf("expected token reuse across agents to be rejected")
}
}
func TestRemoveHostAgentUnbindsToken(t *testing.T) {
t.Helper()
monitor := &Monitor{
state: models.NewState(),
alertManager: alerts.NewManager(),
hostTokenBindings: make(map[string]string),
config: &config.Config{},
}
t.Cleanup(func() { monitor.alertManager.Stop() })
hostID := "host-to-remove"
tokenID := "token-remove"
monitor.state.UpsertHost(models.Host{
ID: hostID,
Hostname: "remove.me",
TokenID: tokenID,
})
monitor.hostTokenBindings[tokenID] = "agent-remove"
if _, err := monitor.RemoveHostAgent(hostID); err != nil {
t.Fatalf("RemoveHostAgent: %v", err)
}
if _, exists := monitor.hostTokenBindings[tokenID]; exists {
t.Fatalf("expected token binding to be cleared after host removal")
}
}

View file

@ -130,6 +130,69 @@ clear_pending_control_plane() {
rm -f "$PENDING_CONTROL_PLANE_FILE" 2>/dev/null || true
}
format_ip_to_cidr() {
local ip="$1"
if [[ -z "$ip" ]]; then
return
fi
if [[ "$ip" == */* ]]; then
printf '%s' "$ip"
return
fi
if [[ "$ip" == *:* ]]; then
printf '%s/128' "$ip"
else
printf '%s/32' "$ip"
fi
}
ensure_allowed_source_subnet() {
local subnet="$1"
if [[ -z "$subnet" || ! -f "$CONFIG_FILE" ]]; then
return
fi
local escaped_subnet="${subnet//\//\\/}"
if grep -Eq "^[[:space:]]+-[[:space:]]*${escaped_subnet}([[:space:]]|$)" "$CONFIG_FILE"; then
return
fi
local tmp
tmp=$(mktemp)
if grep -Eq "^[[:space:]]*allowed_source_subnets:" "$CONFIG_FILE"; then
awk -v subnet="$subnet" '
/^allowed_source_subnets:/ {print; in_block=1; next}
in_block && /^[^[:space:]]/ {
if (!added) { printf(" - %s\n", subnet); added=1 }
in_block=0
}
{print}
END {
if (in_block && !added) {
printf(" - %s\n", subnet)
}
}
' "$CONFIG_FILE" > "$tmp"
else
cat "$CONFIG_FILE" > "$tmp"
{
echo ""
echo "allowed_source_subnets:"
echo " - $subnet"
} >> "$tmp"
fi
if mv "$tmp" "$CONFIG_FILE"; then
print_info "Added allowed_source_subnets entry ${subnet}"
else
rm -f "$tmp"
print_warn "Failed to update allowed_source_subnets with ${subnet}"
fi
}
configure_local_authorized_key() {
local auth_line=$1
@ -1833,10 +1896,10 @@ if [[ "$HTTP_MODE" == true ]]; then
chown pulse-sensor-proxy:pulse-sensor-proxy /etc/pulse-sensor-proxy/.http-auth-token
# Backup config and token files before modifying
if [[ -f /etc/pulse-sensor-proxy/config.yaml ]]; then
if [[ -f "$CONFIG_FILE" ]]; then
BACKUP_TIMESTAMP="$(date +%s)"
BACKUP_CONFIG="/etc/pulse-sensor-proxy/config.yaml.backup.$BACKUP_TIMESTAMP"
cp /etc/pulse-sensor-proxy/config.yaml "$BACKUP_CONFIG"
BACKUP_CONFIG="${CONFIG_FILE}.backup.$BACKUP_TIMESTAMP"
cp "$CONFIG_FILE" "$BACKUP_CONFIG"
print_info "Config backed up to: $BACKUP_CONFIG"
# Also backup token files so rollback restores matching secrets
@ -1846,10 +1909,10 @@ if [[ "$HTTP_MODE" == true ]]; then
fi
# Remove any existing HTTP configuration to prevent duplicates
if grep -q "^# HTTP Mode Configuration" /etc/pulse-sensor-proxy/config.yaml; then
if grep -q "^# HTTP Mode Configuration" "$CONFIG_FILE"; then
print_info "Removing existing HTTP configuration..."
# Remove from "# HTTP Mode Configuration" to end of file
sed -i '/^# HTTP Mode Configuration/,$ d' /etc/pulse-sensor-proxy/config.yaml
sed -i '/^# HTTP Mode Configuration/,$ d' "$CONFIG_FILE"
fi
fi
@ -1866,15 +1929,38 @@ if [[ "$HTTP_MODE" == true ]]; then
print_info "Pulse server detected at: $PULSE_IP"
HTTP_ALLOWED_SUBNETS=()
PULSE_HTTP_SUBNET="$(format_ip_to_cidr "$PULSE_IP")"
LOCAL_HTTP_SUBNET="$(format_ip_to_cidr "$PRIMARY_IP")"
LOOPBACK_HTTP_SUBNET="127.0.0.1/32"
[[ -n "$PULSE_HTTP_SUBNET" ]] && HTTP_ALLOWED_SUBNETS+=("$PULSE_HTTP_SUBNET")
HTTP_ALLOWED_SUBNETS+=("$LOOPBACK_HTTP_SUBNET")
[[ -n "$LOCAL_HTTP_SUBNET" ]] && HTTP_ALLOWED_SUBNETS+=("$LOCAL_HTTP_SUBNET")
declare -A HTTP_SUBNET_SEEN=()
deduped_http_subnets=()
for subnet in "${HTTP_ALLOWED_SUBNETS[@]}"; do
[[ -z "$subnet" ]] && continue
if [[ -z "${HTTP_SUBNET_SEEN[$subnet]+x}" ]]; then
HTTP_SUBNET_SEEN[$subnet]=1
deduped_http_subnets+=("$subnet")
fi
done
HTTP_ALLOWED_SUBNETS=("${deduped_http_subnets[@]}")
# Configure HTTP mode - check if already configured to avoid duplicates
print_info "Configuring HTTP mode..."
if grep -q "^http_enabled:" /etc/pulse-sensor-proxy/config.yaml 2>/dev/null; then
if grep -q "^http_enabled:" "$CONFIG_FILE" 2>/dev/null; then
# HTTP mode already configured - only update the token (avoid duplicates)
sed -i "s|^http_auth_token:.*|http_auth_token: $HTTP_AUTH_TOKEN|" /etc/pulse-sensor-proxy/config.yaml
sed -i "s|^http_auth_token:.*|http_auth_token: $HTTP_AUTH_TOKEN|" "$CONFIG_FILE"
for subnet in "${HTTP_ALLOWED_SUBNETS[@]}"; do
ensure_allowed_source_subnet "$subnet"
done
print_info "Updated HTTP auth token (existing HTTP mode configuration kept)"
else
# Fresh HTTP mode configuration - append to file
cat >> /etc/pulse-sensor-proxy/config.yaml << EOF
cat >> "$CONFIG_FILE" << EOF
# HTTP Mode Configuration (External PVE Host)
http_enabled: true
@ -1883,14 +1969,15 @@ http_tls_cert: /etc/pulse-sensor-proxy/tls/server.crt
http_tls_key: /etc/pulse-sensor-proxy/tls/server.key
http_auth_token: "$HTTP_AUTH_TOKEN"
# Allow HTTP connections from Pulse server and localhost (for self-monitoring)
# Allow HTTP connections from Pulse server, localhost, and this host
allowed_source_subnets:
- $PULSE_IP/32
- 127.0.0.1/32
EOF
for subnet in "${HTTP_ALLOWED_SUBNETS[@]}"; do
echo " - $subnet" >> "$CONFIG_FILE"
done
fi
chown pulse-sensor-proxy:pulse-sensor-proxy /etc/pulse-sensor-proxy/config.yaml
chmod 0644 /etc/pulse-sensor-proxy/config.yaml
chown pulse-sensor-proxy:pulse-sensor-proxy "$CONFIG_FILE"
chmod 0644 "$CONFIG_FILE"
print_success "HTTP mode configured successfully"
echo ""