Select reachable Proxmox auto-register hosts

This commit is contained in:
rcourtman 2026-04-01 13:17:21 +01:00
parent 65118b4fc4
commit cc756f5d2f
4 changed files with 280 additions and 64 deletions

View file

@ -5322,14 +5322,15 @@ func (h *ConfigHandlers) HandleUpdateMockMode(w http.ResponseWriter, r *http.Req
// AutoRegisterRequest represents a request from the setup script or agent to auto-register a node
type AutoRegisterRequest struct {
Type string `json:"type"` // "pve" or "pbs"
Host string `json:"host"` // The host URL
TokenID string `json:"tokenId"` // Full token ID like pulse-monitor@pam!pulse-token
TokenValue string `json:"tokenValue,omitempty"` // The token value for the node
ServerName string `json:"serverName"` // Hostname or IP
SetupCode string `json:"setupCode,omitempty"` // One-time setup code for authentication (deprecated)
AuthToken string `json:"authToken,omitempty"` // Direct auth token from URL (new approach)
Source string `json:"source,omitempty"` // "agent" or "script" - indicates how the node was registered
Type string `json:"type"` // "pve" or "pbs"
Host string `json:"host"` // The preferred host URL
CandidateHosts []string `json:"candidateHosts,omitempty"` // Alternate host URLs the server can try from its own network view
TokenID string `json:"tokenId"` // Full token ID like pulse-monitor@pam!pulse-token
TokenValue string `json:"tokenValue,omitempty"` // The token value for the node
ServerName string `json:"serverName"` // Hostname or IP
SetupCode string `json:"setupCode,omitempty"` // One-time setup code for authentication (deprecated)
AuthToken string `json:"authToken,omitempty"` // Direct auth token from URL (new approach)
Source string `json:"source,omitempty"` // "agent" or "script" - indicates how the node was registered
// CheckRegistration asks Pulse whether this Proxmox type already exists in config.
// Used by agents to validate stale local registration marker files after server reinstalls.
CheckRegistration bool `json:"checkRegistration,omitempty"`
@ -5339,6 +5340,73 @@ type AutoRegisterRequest struct {
Password string `json:"password,omitempty"` // Password for authentication (never stored)
}
func normalizeAutoRegisterHostCandidates(nodeType, primary string, alternates []string) ([]string, error) {
candidates := make([]string, 0, len(alternates)+1)
seen := make(map[string]struct{}, len(alternates)+1)
addCandidate := func(raw string) error {
if strings.TrimSpace(raw) == "" {
return nil
}
normalized, err := normalizeNodeHost(raw, nodeType)
if err != nil {
return err
}
if _, exists := seen[normalized]; exists {
return nil
}
seen[normalized] = struct{}{}
candidates = append(candidates, normalized)
return nil
}
if err := addCandidate(primary); err != nil {
return nil, err
}
for _, candidate := range alternates {
if err := addCandidate(candidate); err != nil {
log.Debug().
Str("type", nodeType).
Str("candidate", candidate).
Err(err).
Msg("Ignoring invalid auto-register host candidate")
}
}
if len(candidates) == 0 {
return nil, fmt.Errorf("host is required")
}
return candidates, nil
}
func selectAutoRegisterHost(nodeType, primary string, alternates []string) (string, string, bool, []string, error) {
candidates, err := normalizeAutoRegisterHostCandidates(nodeType, primary, alternates)
if err != nil {
return "", "", false, nil, err
}
for idx, candidate := range candidates {
fingerprint, err := tlsutil.FetchFingerprint(candidate)
if err != nil {
log.Debug().
Str("type", nodeType).
Str("candidate", candidate).
Err(err).
Msg("Auto-register candidate not reachable for fingerprint capture")
continue
}
if idx > 0 {
log.Info().
Str("type", nodeType).
Str("selectedHost", candidate).
Str("requestedHost", candidates[0]).
Msg("Auto-register switched to fallback host candidate reachable from Pulse")
}
return candidate, fingerprint, true, candidates, nil
}
return candidates[0], "", false, candidates, nil
}
func autoRegisterNodeMatchesHost(nodeHost, requestedHost string) bool {
nodeHost = strings.TrimSpace(nodeHost)
requestedHost = strings.TrimSpace(requestedHost)
@ -5369,16 +5437,30 @@ func (h *ConfigHandlers) autoRegisteredNodeExists(ctx context.Context, req *Auto
return false
}
candidates, err := normalizeAutoRegisterHostCandidates(req.Type, req.Host, req.CandidateHosts)
if err != nil {
return false
}
matchesAny := func(nodeHost string) bool {
for _, candidate := range candidates {
if autoRegisterNodeMatchesHost(nodeHost, candidate) {
return true
}
}
return false
}
switch strings.ToLower(strings.TrimSpace(req.Type)) {
case "pve":
for _, node := range h.getConfig(ctx).PVEInstances {
if autoRegisterNodeMatchesHost(node.Host, req.Host) {
if matchesAny(node.Host) {
return true
}
}
case "pbs":
for _, node := range h.getConfig(ctx).PBSInstances {
if autoRegisterNodeMatchesHost(node.Host, req.Host) {
if matchesAny(node.Host) {
return true
}
}
@ -5649,23 +5731,15 @@ func (h *ConfigHandlers) HandleAutoRegister(w http.ResponseWriter, r *http.Reque
return
}
host, err := normalizeNodeHost(req.Host, req.Type)
host, fingerprint, verifySSL, candidateHosts, err := selectAutoRegisterHost(req.Type, req.Host, req.CandidateHosts)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
fingerprint := ""
if fp, err := tlsutil.FetchFingerprint(host); err != nil {
log.Warn().Err(err).Str("host", host).Msg("Failed to fetch TLS fingerprint for auto-register")
} else {
fingerprint = fp
}
// Create a node configuration
boolFalse := false
boolTrue := true
verifySSL := fingerprint != "" // Only enforce strict TLS when we have a fingerprint to verify against
nodeConfig := NodeConfigRequest{
Type: req.Type,
Name: req.ServerName,
@ -5685,6 +5759,13 @@ func (h *ConfigHandlers) HandleAutoRegister(w http.ResponseWriter, r *http.Reque
MonitorGarbageJobs: &boolFalse,
}
log.Info().
Str("type", req.Type).
Str("selectedHost", host).
Strs("candidateHosts", candidateHosts).
Bool("verifySSL", verifySSL).
Msg("Resolved auto-register host candidates")
// Check if a node with this host already exists
// IMPORTANT: Match by Host URL primarily.
// Also match by name+tokenID for DHCP scenarios where IP changed but it's the same host.
@ -6200,7 +6281,7 @@ func (h *ConfigHandlers) handleSecureAutoRegister(w http.ResponseWriter, r *http
Str("username", req.Username).
Msg("Processing secure auto-register request")
host, err := normalizeNodeHost(req.Host, req.Type)
host, fingerprint, verifySSL, candidateHosts, err := selectAutoRegisterHost(req.Type, req.Host, req.CandidateHosts)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
@ -6219,13 +6300,12 @@ func (h *ConfigHandlers) handleSecureAutoRegister(w http.ResponseWriter, r *http
tokenValue := fmt.Sprintf("%x-%x-%x-%x-%x",
tokenBytes[0:4], tokenBytes[4:6], tokenBytes[6:8], tokenBytes[8:10], tokenBytes[10:16])
fingerprint := ""
if fp, err := tlsutil.FetchFingerprint(host); err != nil {
log.Warn().Err(err).Str("host", host).Msg("Failed to fetch TLS fingerprint for auto-register")
} else {
fingerprint = fp
}
verifySSL := fingerprint != "" // Only enforce strict TLS when we have a fingerprint to verify against
log.Info().
Str("type", req.Type).
Str("selectedHost", host).
Strs("candidateHosts", candidateHosts).
Bool("verifySSL", verifySSL).
Msg("Resolved secure auto-register host candidates")
existingTokenID := ""
existingTokenValue := ""

View file

@ -1277,3 +1277,79 @@ func TestHandleAutoRegisterUpdateDoesNotClearFingerprintWhenFetchFails(t *testin
t.Fatalf("expected existing fingerprint to be preserved, got %q", cfg.PVEInstances[0].Fingerprint)
}
}
func TestHandleAutoRegisterSelectsReachableFallbackCandidateHost(t *testing.T) {
tempDir := t.TempDir()
t.Setenv("PULSE_DATA_DIR", tempDir)
originalDetectPVECluster := detectPVECluster
detectPVECluster = func(clientConfig proxmox.ClientConfig, nodeName string, existingEndpoints []config.ClusterEndpoint) (bool, string, []config.ClusterEndpoint) {
return false, "", nil
}
defer func() { detectPVECluster = originalDetectPVECluster }()
pveServer := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
defer pveServer.Close()
cfg := &config.Config{
DataPath: tempDir,
ConfigPath: tempDir,
}
handler := newTestConfigHandlers(t, cfg)
const authToken = "TEMP-TOKEN-FALLBACK"
tokenHash := internalauth.HashAPIToken(authToken)
handler.codeMutex.Lock()
handler.setupCodes[tokenHash] = &SetupCode{
ExpiresAt: time.Now().Add(5 * time.Minute),
NodeType: "pve",
}
handler.codeMutex.Unlock()
reqBody := AutoRegisterRequest{
Type: "pve",
Host: "https://127.0.0.1:1",
CandidateHosts: []string{"https://127.0.0.1:1", pveServer.URL},
TokenID: "pulse-monitor@pam!pulse-fallback",
TokenValue: "secret-token",
ServerName: "pve01",
Source: "agent",
AuthToken: authToken,
}
body, err := json.Marshal(reqBody)
if err != nil {
t.Fatalf("failed to marshal request: %v", err)
}
req := httptest.NewRequest(http.MethodPost, "/api/auto-register", bytes.NewReader(body))
rec := httptest.NewRecorder()
handler.HandleAutoRegister(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("registration failed: status=%d, body=%s", rec.Code, rec.Body.String())
}
if len(cfg.PVEInstances) != 1 {
t.Fatalf("expected 1 PVE instance, got %d", len(cfg.PVEInstances))
}
expectedHost, err := normalizeNodeHost(pveServer.URL, "pve")
if err != nil {
t.Fatalf("failed to normalize expected host: %v", err)
}
if cfg.PVEInstances[0].Host != expectedHost {
t.Fatalf("selected host = %q, want reachable fallback %q", cfg.PVEInstances[0].Host, expectedHost)
}
expectedFP, err := tlsutil.FetchFingerprint(pveServer.URL)
if err != nil {
t.Fatalf("failed to fetch expected fingerprint: %v", err)
}
if cfg.PVEInstances[0].Fingerprint != expectedFP {
t.Fatalf("fingerprint = %q, want %q", cfg.PVEInstances[0].Fingerprint, expectedFP)
}
if !cfg.PVEInstances[0].VerifySSL {
t.Fatal("expected verifySSL to be enabled when fallback fingerprint capture succeeds")
}
}

View file

@ -611,69 +611,99 @@ func (p *ProxmoxSetup) parsePBSTokenValue(output string) string {
return ""
}
// getHostURL constructs the host URL for this Proxmox node.
// Uses the local IP that can reach Pulse, falling back to intelligent IP selection.
func (p *ProxmoxSetup) getHostURL(ptype string) string {
func appendUniqueHostCandidate(candidates []string, seen map[string]struct{}, candidate string) []string {
normalized := strings.TrimSpace(candidate)
if normalized == "" {
return candidates
}
if _, exists := seen[normalized]; exists {
return candidates
}
seen[normalized] = struct{}{}
return append(candidates, normalized)
}
func (p *ProxmoxSetup) candidateHostURLs(ptype string) []string {
port := "8006"
if ptype == "pbs" {
port = "8007"
}
// Priority 1: User-specified ReportIP override from configuration.
// This allows users to manually specify which IP should be used for Proxmox API
// connections when auto-detection picks the wrong one (e.g., Issue #1061).
if p.reportIP != "" {
p.logger.Info().Str("ip", p.reportIP).Msg("Using user-specified ReportIP for Proxmox registration")
return fmt.Sprintf("https://%s:%s", p.reportIP, port)
candidates := make([]string, 0, 4)
seen := make(map[string]struct{}, 4)
buildURL := func(host string) string {
if strings.TrimSpace(host) == "" {
return ""
}
return fmt.Sprintf("https://%s:%s", host, port)
}
// Priority 2: Prefer the system hostname when it resolves to a non-loopback address.
// This preserves admin-managed DNS names and matching TLS certificates instead of
// silently downgrading to an inferred IP address, which can pick the wrong interface
// on multi-NIC Proxmox hosts and break internal-CA deployments.
// Respect an explicit operator override as authoritative.
if p.reportIP != "" {
return append(candidates, buildURL(p.reportIP))
}
if p.collector == nil {
hostname := strings.TrimSpace(p.hostname)
if hostname == "" {
hostname = "localhost"
}
return append(candidates, buildURL(hostname))
}
// Keep the hostname candidate first for environments where Pulse can resolve
// the managed DNS name and should preserve the node certificate identity.
if hostname := strings.TrimSpace(p.hostname); hostname != "" {
if hostnameIP := p.getIPForHostname(); hostnameIP != "" && !isLoopbackOrLinkLocalIP(hostnameIP) {
p.logger.Debug().
Str("hostname", hostname).
Str("ip", hostnameIP).
Msg("Using resolvable hostname for Proxmox registration")
return fmt.Sprintf("https://%s:%s", hostname, port)
candidates = appendUniqueHostCandidate(candidates, seen, buildURL(hostname))
}
}
// Priority 3: Try to determine which local IP is used to connect to Pulse.
// This remains a fallback for environments without usable hostname resolution.
// Also provide the exact local IP that can reach Pulse so the server can fall
// back to a proven-routable endpoint when the hostname is not reachable from
// the Pulse host (for example inside a Docker bridge without internal DNS).
if reachableIP := p.getIPThatReachesPulse(); reachableIP != "" {
p.logger.Debug().Str("ip", reachableIP).Msg("Using IP that can reach Pulse server")
return fmt.Sprintf("https://%s:%s", reachableIP, port)
candidates = appendUniqueHostCandidate(candidates, seen, buildURL(reachableIP))
}
// Priority 4: Get all IPs and select the best one based on heuristics.
if out, err := p.collector.CommandCombinedOutput(context.Background(), "hostname", "-I"); err == nil {
ips := strings.Fields(out)
if len(ips) > 0 {
// Get the IP that the system hostname currently resolves to
hostnameIP := p.getIPForHostname()
bestIP := selectBestIP(ips, hostnameIP)
if bestIP != "" {
return fmt.Sprintf("https://%s:%s", bestIP, port)
if bestIP := selectBestIP(ips, hostnameIP); bestIP != "" {
candidates = appendUniqueHostCandidate(candidates, seen, buildURL(bestIP))
}
}
}
// Final fallback to hostname if IP detection failed
hostname := p.hostname
if len(candidates) > 0 {
return candidates
}
hostname := strings.TrimSpace(p.hostname)
if hostname == "" {
hostname = "localhost"
}
return fmt.Sprintf("https://%s:%s", hostname, port)
return append(candidates, buildURL(hostname))
}
// getHostURL constructs the host URL for this Proxmox node.
// Uses the local IP that can reach Pulse, falling back to intelligent IP selection.
func (p *ProxmoxSetup) getHostURL(ptype string) string {
candidates := p.candidateHostURLs(ptype)
if len(candidates) == 0 {
return ""
}
return candidates[0]
}
// getIPThatReachesPulse determines which local IP is used to connect to the Pulse server.
// This handles cases where multiple network interfaces exist (e.g., management, Ceph, cluster ring)
// and ensures we pick the one that can actually reach Pulse. Related to #929.
func (p *ProxmoxSetup) getIPThatReachesPulse() string {
if p.collector == nil {
return ""
}
if p.pulseURL == "" {
return ""
}
@ -726,6 +756,9 @@ func (p *ProxmoxSetup) getIPThatReachesPulse() string {
// getIPForHostname resolves the system hostname to an IP address.
func (p *ProxmoxSetup) getIPForHostname() string {
if p.collector == nil {
return ""
}
hostname := p.hostname
if hostname == "" {
hostname, _ = p.collector.Hostname()
@ -898,13 +931,25 @@ func (p *ProxmoxSetup) doRegisterRequest(ctx context.Context, body []byte) error
// registerWithPulse calls the auto-register endpoint to add the node.
func (p *ProxmoxSetup) registerWithPulse(ctx context.Context, ptype, hostURL, tokenID, tokenValue string) error {
candidateHosts := p.candidateHostURLs(ptype)
if len(candidateHosts) == 0 || candidateHosts[0] != hostURL {
seen := make(map[string]struct{}, len(candidateHosts)+1)
reordered := make([]string, 0, len(candidateHosts)+1)
reordered = appendUniqueHostCandidate(reordered, seen, hostURL)
for _, candidate := range candidateHosts {
reordered = appendUniqueHostCandidate(reordered, seen, candidate)
}
candidateHosts = reordered
}
payload := map[string]interface{}{
"type": ptype,
"host": hostURL,
"serverName": p.hostname,
"tokenId": tokenID,
"tokenValue": tokenValue,
"source": "agent", // Indicates this was registered via agent
"type": ptype,
"host": hostURL,
"candidateHosts": candidateHosts,
"serverName": p.hostname,
"tokenId": tokenID,
"tokenValue": tokenValue,
"source": "agent", // Indicates this was registered via agent
}
if token := strings.TrimSpace(p.apiToken); token != "" {
// Auto-register accepts one-time setup tokens via the JSON body. The

View file

@ -370,6 +370,7 @@ func TestRegisterWithPulseRetry(t *testing.T) {
var gotAuth string
var gotAPIToken string
var gotAuthToken string
var gotCandidateHosts []string
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotAuth = r.Header.Get("Authorization")
gotAPIToken = r.Header.Get("X-API-Token")
@ -380,6 +381,14 @@ func TestRegisterWithPulseRetry(t *testing.T) {
if token, _ := payload["authToken"].(string); token != "" {
gotAuthToken = token
}
if rawCandidates, ok := payload["candidateHosts"].([]interface{}); ok {
gotCandidateHosts = gotCandidateHosts[:0]
for _, raw := range rawCandidates {
if candidate, ok := raw.(string); ok {
gotCandidateHosts = append(gotCandidateHosts, candidate)
}
}
}
n := atomic.AddInt32(&attempt, 1)
if n <= 2 {
w.WriteHeader(http.StatusServiceUnavailable)
@ -414,6 +423,12 @@ func TestRegisterWithPulseRetry(t *testing.T) {
if gotAuthToken != "test-token" {
t.Fatalf("authToken payload = %q, want %q", gotAuthToken, "test-token")
}
if len(gotCandidateHosts) == 0 {
t.Fatal("expected candidateHosts payload to be present")
}
if gotCandidateHosts[0] != "https://10.0.0.1:8006" {
t.Fatalf("candidateHosts[0] = %q, want %q", gotCandidateHosts[0], "https://10.0.0.1:8006")
}
}
func TestRegisterWithPulseNoRetryOn4xx(t *testing.T) {