Pulse/cmd/pulse-sensor-proxy/ssh.go
rcourtman 9e339957c6 fix: Update runtime config when toggling Docker update actions setting
The DisableDockerUpdateActions setting was being saved to disk but not
updated in h.config, causing the UI toggle to appear to revert on page
refresh since the API returned the stale runtime value.

Related to #1023
2026-01-03 11:14:17 +00:00

1051 lines
31 KiB
Go

package main
import (
"bufio"
"bytes"
"context"
"errors"
"fmt"
"io"
"net"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/rcourtman/pulse-go-rewrite/internal/ssh/knownhosts"
"github.com/rs/zerolog/log"
)
// Variable for testing to mock net.Interfaces
var netInterfaces = net.Interfaces
// Variable for testing to mock exec.LookPath
var execLookPath = exec.LookPath
// Variable for testing to mock os.Hostname
var osHostname = os.Hostname
// Variable for testing to mock exec.Command (for simple output)
var execCommandFunc = exec.Command
const (
tempWrapperPath = "/usr/local/libexec/pulse-sensor-proxy/temp-wrapper.sh"
tempWrapperScript = `#!/bin/sh
set -eu
WRAPPER_PATH="${PULSE_SENSOR_WRAPPER:-/opt/pulse/sensor-proxy/bin/pulse-sensor-wrapper.sh}"
# Prefer the full wrapper when available so SMART disk temperatures are included
if [ -x "$WRAPPER_PATH" ]; then
exec "$WRAPPER_PATH"
fi
# Legacy locations (for older installations)
if [ -x /usr/local/bin/pulse-sensor-wrapper.sh ]; then
exec /usr/local/bin/pulse-sensor-wrapper.sh
fi
# Fallback to basic lm-sensors output (no SMART data)
if command -v sensors >/dev/null 2>&1; then
OUTPUT="$(sensors -j 2>/dev/null || true)"
if [ -n "$OUTPUT" ]; then
printf '%s\n' "$OUTPUT"
exit 0
fi
fi
if [ -r /sys/class/thermal/thermal_zone0/temp ]; then
RAW="$(cat /sys/class/thermal/thermal_zone0/temp 2>/dev/null || true)"
if [ -n "$RAW" ]; then
TEMP="$(awk -v raw="$RAW" 'BEGIN { if (raw == "") exit 1; printf "%.2f", raw / 1000.0 }' 2>/dev/null || true)"
if [ -n "$TEMP" ]; then
printf '{"rpitemp-virtual":{"temp1":{"temp1_input":%s}}}\n' "$TEMP"
exit 0
fi
fi
fi
exit 1
`
)
const proxmoxClusterKnownHostsPath = "/etc/pve/priv/known_hosts"
// execCommand executes a shell command and returns output
func execCommand(cmd string) (string, error) {
out, err := exec.Command("sh", "-c", cmd).CombinedOutput()
return string(out), err
}
// execCommandWithLimitsContext runs a shell command with output limits and context cancellation
func execCommandWithLimitsContext(ctx context.Context, cmd string, stdoutLimit, stderrLimit int64) (string, string, bool, bool, error) {
command := exec.CommandContext(ctx, "sh", "-c", cmd)
stdoutPipe, err := command.StdoutPipe()
if err != nil {
return "", "", false, false, fmt.Errorf("stdout pipe: %w", err)
}
stderrPipe, err := command.StderrPipe()
if err != nil {
return "", "", false, false, fmt.Errorf("stderr pipe: %w", err)
}
if err := command.Start(); err != nil {
return "", "", false, false, err
}
type pipeResult struct {
data []byte
exceeded bool
err error
}
var wg sync.WaitGroup
wg.Add(2)
stdoutCh := make(chan pipeResult, 1)
stderrCh := make(chan pipeResult, 1)
go func() {
defer wg.Done()
data, exceeded, readErr := readAllWithLimit(stdoutPipe, stdoutLimit)
stdoutCh <- pipeResult{data: data, exceeded: exceeded, err: readErr}
}()
go func() {
defer wg.Done()
data, exceeded, readErr := readAllWithLimit(stderrPipe, stderrLimit)
stderrCh <- pipeResult{data: data, exceeded: exceeded, err: readErr}
}()
var stdoutRes, stderrRes pipeResult
wgDone := make(chan struct{})
go func() {
wg.Wait()
close(wgDone)
}()
<-wgDone
stdoutRes = <-stdoutCh
stderrRes = <-stderrCh
waitErr := command.Wait()
if stdoutRes.err != nil {
return "", "", stdoutRes.exceeded, stderrRes.exceeded, fmt.Errorf("stdout read: %w", stdoutRes.err)
}
if stderrRes.err != nil {
return "", "", stdoutRes.exceeded, stderrRes.exceeded, fmt.Errorf("stderr read: %w", stderrRes.err)
}
if waitErr != nil {
return string(stdoutRes.data), string(stderrRes.data), stdoutRes.exceeded, stderrRes.exceeded, waitErr
}
return string(stdoutRes.data), string(stderrRes.data), stdoutRes.exceeded, stderrRes.exceeded, nil
}
func execCommandWithLimits(cmd string, stdoutLimit, stderrLimit int64) (string, string, bool, bool, error) {
command := exec.Command("sh", "-c", cmd)
stdoutPipe, err := command.StdoutPipe()
if err != nil {
return "", "", false, false, fmt.Errorf("stdout pipe: %w", err)
}
stderrPipe, err := command.StderrPipe()
if err != nil {
return "", "", false, false, fmt.Errorf("stderr pipe: %w", err)
}
if err := command.Start(); err != nil {
return "", "", false, false, err
}
type pipeResult struct {
data []byte
exceeded bool
err error
}
var wg sync.WaitGroup
wg.Add(2)
stdoutCh := make(chan pipeResult, 1)
stderrCh := make(chan pipeResult, 1)
go func() {
defer wg.Done()
data, exceeded, readErr := readAllWithLimit(stdoutPipe, stdoutLimit)
stdoutCh <- pipeResult{data: data, exceeded: exceeded, err: readErr}
}()
go func() {
defer wg.Done()
data, exceeded, readErr := readAllWithLimit(stderrPipe, stderrLimit)
stderrCh <- pipeResult{data: data, exceeded: exceeded, err: readErr}
}()
var stdoutRes, stderrRes pipeResult
wgDone := make(chan struct{})
go func() {
wg.Wait()
close(wgDone)
}()
<-wgDone
stdoutRes = <-stdoutCh
stderrRes = <-stderrCh
waitErr := command.Wait()
if stdoutRes.err != nil {
return "", "", stdoutRes.exceeded, stderrRes.exceeded, fmt.Errorf("stdout read: %w", stdoutRes.err)
}
if stderrRes.err != nil {
return "", "", stdoutRes.exceeded, stderrRes.exceeded, fmt.Errorf("stderr read: %w", stderrRes.err)
}
if waitErr != nil {
return string(stdoutRes.data), string(stderrRes.data), stdoutRes.exceeded, stderrRes.exceeded, waitErr
}
return string(stdoutRes.data), string(stderrRes.data), stdoutRes.exceeded, stderrRes.exceeded, nil
}
func readAllWithLimit(r io.Reader, limit int64) ([]byte, bool, error) {
if limit <= 0 {
data, err := io.ReadAll(r)
return data, false, err
}
const chunkSize = 32 * 1024
buf := make([]byte, chunkSize)
var out bytes.Buffer
var total int64
exceeded := false
for {
n, err := r.Read(buf)
if n > 0 {
if total < limit {
remaining := limit - total
toWrite := n
if int64(n) > remaining {
toWrite = int(remaining)
exceeded = true
}
if toWrite > 0 {
if _, writeErr := out.Write(buf[:toWrite]); writeErr != nil {
return nil, exceeded, writeErr
}
}
if int64(n) > remaining {
exceeded = true
}
} else {
exceeded = true
}
total += int64(n)
}
if err != nil {
if err == io.EOF {
break
}
return nil, exceeded, err
}
}
return out.Bytes(), exceeded, nil
}
func (p *Proxy) ensureHostKeyFromProxmox(ctx context.Context, node string) error {
if !isProxmoxHost() {
return fmt.Errorf("not running on Proxmox host")
}
entries, err := loadProxmoxHostKeys(node)
if err != nil {
return err
}
if err := p.knownHosts.EnsureWithEntries(ctx, node, 22, entries); err != nil {
return p.handleHostKeyEnsureError(node, err)
}
log.Debug().
Str("node", node).
Msg("Loaded host key from Proxmox cluster store")
return nil
}
func (p *Proxy) handleHostKeyEnsureError(node string, err error) error {
var changeErr *knownhosts.HostKeyChangeError
if errors.As(err, &changeErr) {
log.Error().
Str("node", node).
Str("host_spec", changeErr.Host).
Msg("Detected SSH host key change")
if p.metrics != nil {
p.metrics.recordHostKeyChange(node)
}
}
return err
}
func loadProxmoxHostKeys(host string) ([][]byte, error) {
file, err := os.Open(proxmoxClusterKnownHostsPath)
if err != nil {
return nil, err
}
defer file.Close()
var entries [][]byte
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" || strings.HasPrefix(line, "#") {
continue
}
fields := strings.Fields(line)
if len(fields) < 3 {
continue
}
if !knownhosts.HostFieldMatches(host, fields[0]) {
continue
}
comment := ""
if len(fields) > 3 {
comment = strings.Join(fields[3:], " ")
}
var entry string
if comment != "" {
entry = fmt.Sprintf("%s %s %s %s", host, fields[1], fields[2], comment)
} else {
entry = fmt.Sprintf("%s %s %s", host, fields[1], fields[2])
}
entries = append(entries, []byte(entry))
}
if err := scanner.Err(); err != nil {
return nil, err
}
if len(entries) == 0 {
return nil, fmt.Errorf("no Proxmox host keys found for %s in %s", host, proxmoxClusterKnownHostsPath)
}
return entries, nil
}
// getPublicKeyFrom reads the SSH public key from a specific directory
func (p *Proxy) getPublicKeyFrom(keyDir string) (string, error) {
pubKeyPath := filepath.Join(keyDir, "id_ed25519.pub")
data, err := os.ReadFile(pubKeyPath)
if err != nil {
return "", err
}
return strings.TrimSpace(string(data)), nil
}
// buildAuthorizedKey constructs an authorized_keys entry with from= IP restrictions
func (p *Proxy) buildAuthorizedKey(pubKey string) (string, error) {
subnets := p.config.AllowedSourceSubnets
if len(subnets) == 0 {
return "", fmt.Errorf("no allowed source subnets configured or detected")
}
// Build from= clause with all allowed subnets
fromClause := fmt.Sprintf(`from="%s"`, strings.Join(subnets, ","))
// Comment helps identify and upgrade this key later
const comment = "pulse-sensor-proxy"
// Forced command with all restrictions
forced := fmt.Sprintf(`command="%s",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty`, tempWrapperPath)
// Format: from="...",command="...",no-* ssh-rsa AAAA... pulse-sensor-proxy
return fmt.Sprintf(`%s,%s %s %s`, fromClause, forced, pubKey, comment), nil
}
func (p *Proxy) ensureHostKey(node string) error {
if p.knownHosts == nil {
return fmt.Errorf("host key manager not configured")
}
ctx := context.Background()
if isProxmoxHost() {
if err := p.ensureHostKeyFromProxmox(ctx, node); err == nil {
return nil
} else {
if p.config.RequireProxmoxHostkeys {
return err
}
log.Warn().
Str("node", node).
Err(err).
Msg("Failed to load host key from Proxmox; falling back to ssh-keyscan")
}
} else if p.config.RequireProxmoxHostkeys {
return fmt.Errorf("require_proxmox_hostkeys enabled but not running on Proxmox host")
}
if err := p.knownHosts.Ensure(ctx, node); err != nil {
return p.handleHostKeyEnsureError(node, err)
}
return nil
}
func (p *Proxy) sshCommonOptions() string {
if p.knownHosts == nil {
return "-o StrictHostKeyChecking=yes -o BatchMode=yes"
}
return fmt.Sprintf("-o StrictHostKeyChecking=yes -o BatchMode=yes -o UserKnownHostsFile=%s -o GlobalKnownHostsFile=/dev/null",
shellQuote(p.knownHosts.Path()))
}
func shellQuote(arg string) string {
if arg == "" {
return "''"
}
if !strings.Contains(arg, "'") {
return "'" + arg + "'"
}
return strconv.Quote(arg)
}
func (p *Proxy) ensureTempWrapper(nodeHost, commonOpts string) error {
dir := filepath.Dir(tempWrapperPath)
mkdirCmd := fmt.Sprintf(
`ssh %s -o ConnectTimeout=10 root@%s "mkdir -p %s && chmod 755 %s"`,
commonOpts,
nodeHost,
dir,
dir,
)
if _, err := execCommand(mkdirCmd); err != nil {
return fmt.Errorf("failed to prepare temperature wrapper directory on %s: %w", nodeHost, err)
}
uploadCmd := fmt.Sprintf(
`ssh %s -o ConnectTimeout=10 root@%s "cat > %s <<'EOF'
%s
EOF
chmod 755 %s"`,
commonOpts,
nodeHost,
tempWrapperPath,
tempWrapperScript,
tempWrapperPath,
)
if _, err := execCommand(uploadCmd); err != nil {
return fmt.Errorf("failed to install temperature wrapper on %s: %w", nodeHost, err)
}
return nil
}
// pushSSHKeyFrom pushes a public key from a specific directory to a node
func (p *Proxy) pushSSHKeyFrom(nodeHost, keyDir string) error {
startTime := time.Now()
nodeLabel := sanitizeNodeLabel(nodeHost)
pubKey, err := p.getPublicKeyFrom(keyDir)
if err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return fmt.Errorf("failed to get public key from %s: %w", keyDir, err)
}
// Build the restricted authorized_keys entry
entry, err := p.buildAuthorizedKey(pubKey)
if err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return fmt.Errorf("failed to build authorized key: %w", err)
}
if err := p.ensureHostKey(nodeHost); err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return fmt.Errorf("failed to ensure host key for %s: %w", nodeHost, err)
}
commonOpts := p.sshCommonOptions()
if err := p.ensureTempWrapper(nodeHost, commonOpts); err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return fmt.Errorf("failed to stage temperature wrapper on %s: %w", nodeHost, err)
}
// Check if the exact restricted entry already exists
checkCmd := fmt.Sprintf(
`ssh %s -o ConnectTimeout=10 root@%s "grep -F '%s' /root/.ssh/authorized_keys 2>/dev/null"`,
commonOpts,
nodeHost,
entry,
)
if output, _ := execCommand(checkCmd); strings.Contains(output, entry) {
log.Debug().Str("node", nodeHost).Msg("SSH key already present with from= restrictions")
p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return nil
}
// Remove old pulse-temp-proxy and pulse-sensor-proxy entries (for upgrade path)
removeOldCmd := fmt.Sprintf(
`ssh %s -o ConnectTimeout=10 root@%s "mkdir -p /root/.ssh && chmod 700 /root/.ssh && grep -v -e 'pulse-temp-proxy$' -e 'pulse-sensor-proxy$' /root/.ssh/authorized_keys > /root/.ssh/authorized_keys.tmp 2>/dev/null || touch /root/.ssh/authorized_keys.tmp"`,
commonOpts,
nodeHost,
)
if _, err := execCommand(removeOldCmd); err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return fmt.Errorf("failed to prepare authorized_keys on %s: %w", nodeHost, err)
}
// Add the new restricted key and atomically replace the file
addCmd := fmt.Sprintf(
`ssh %s -o ConnectTimeout=10 root@%s "echo '%s' >> /root/.ssh/authorized_keys.tmp && mv /root/.ssh/authorized_keys.tmp /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"`,
commonOpts,
nodeHost,
entry,
)
if _, err := execCommand(addCmd); err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return fmt.Errorf("failed to add SSH key to %s: %w", nodeHost, err)
}
log.Info().
Str("node", nodeHost).
Str("key_dir", keyDir).
Strs("allowed_subnets", p.config.AllowedSourceSubnets).
Msg("SSH key installed with from= IP restrictions")
p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return nil
}
// testSSHConnection verifies SSH connectivity to a node
func (p *Proxy) testSSHConnection(nodeHost string) error {
startTime := time.Now()
nodeLabel := sanitizeNodeLabel(nodeHost)
privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
if err := p.ensureHostKey(nodeHost); err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return fmt.Errorf("failed to ensure host key for %s: %w", nodeHost, err)
}
commonOpts := p.sshCommonOptions()
cmd := fmt.Sprintf(
`ssh %s -i %s -T -n -o LogLevel=ERROR -o ConnectTimeout=5 root@%s ""`,
commonOpts,
shellQuote(privKeyPath),
nodeHost,
)
_, stderr, stdoutExceeded, stderrExceeded, err := execCommandWithLimits(cmd, p.maxSSHOutputBytes, p.maxSSHOutputBytes)
if stdoutExceeded {
log.Warn().Str("node", nodeHost).Int64("limit_bytes", p.maxSSHOutputBytes).Msg("SSH test output exceeded limit")
if p.metrics != nil {
p.metrics.recordSSHOutputOversized(nodeHost)
}
return fmt.Errorf("ssh test output exceeded %d bytes", p.maxSSHOutputBytes)
}
if stderrExceeded {
log.Warn().Str("node", nodeHost).Int64("limit_bytes", p.maxSSHOutputBytes).Msg("SSH test stderr exceeded limit")
}
if err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
stderrMsg := strings.TrimSpace(stderr)
if stderrMsg != "" {
return fmt.Errorf("SSH test failed: %w (stderr: %s)", err, stderrMsg)
}
return fmt.Errorf("SSH test failed: %w", err)
}
// The forced command will run "sensors -j" instead of "echo test"
// So we should get JSON output, not "test"
// For now, just check that connection succeeded
p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return nil
}
// getTemperatureViaSSH fetches temperature data from a node
func (p *Proxy) getTemperatureViaSSH(ctx context.Context, nodeHost string) (string, error) {
startTime := time.Now()
nodeLabel := sanitizeNodeLabel(nodeHost)
localNode := isLocalNode(nodeHost)
if localNode {
log.Debug().Str("node", nodeHost).Msg("Self-monitoring detected, using SSH wrapper for SMART temperatures")
}
privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
if err := p.ensureHostKey(nodeHost); err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return "", fmt.Errorf("failed to ensure host key for %s: %w", nodeHost, err)
}
commonOpts := p.sshCommonOptions()
// Since we use a forced wrapper command, any SSH connection runs the wrapper
// We don't need to specify the command
cmd := fmt.Sprintf(
`ssh %s -i %s -T -n -o LogLevel=ERROR -o ConnectTimeout=5 root@%s ""`,
commonOpts,
shellQuote(privKeyPath),
nodeHost,
)
stdout, stderr, stdoutExceeded, stderrExceeded, err := execCommandWithLimitsContext(ctx, cmd, p.maxSSHOutputBytes, p.maxSSHOutputBytes)
if stdoutExceeded {
log.Warn().Str("node", nodeHost).Int64("limit_bytes", p.maxSSHOutputBytes).Msg("SSH temperature output exceeded limit")
if p.metrics != nil {
p.metrics.recordSSHOutputOversized(nodeHost)
}
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return "", fmt.Errorf("ssh output exceeded %d bytes", p.maxSSHOutputBytes)
}
if stderrExceeded {
log.Warn().Str("node", nodeHost).Int64("limit_bytes", p.maxSSHOutputBytes).Msg("SSH temperature stderr exceeded limit")
}
if err != nil {
if localNode {
log.Warn().
Str("node", nodeHost).
Err(err).
Msg("SSH temperature collection failed on local node, falling back to direct sensors")
if fallback, localErr := p.getTemperatureLocal(ctx); localErr == nil && strings.TrimSpace(fallback) != "" {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return fallback, nil
}
}
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
stderrMsg := strings.TrimSpace(stderr)
if stderrMsg != "" {
return "", fmt.Errorf("failed to fetch temperatures: %w (stderr: %s)", err, stderrMsg)
}
return "", fmt.Errorf("failed to fetch temperatures: %w", err)
}
p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return stdout, nil
}
// discoverClusterNodes discovers all nodes in the Proxmox cluster
// Returns IP addresses of cluster nodes
// For standalone nodes (no cluster), returns the host's own addresses
func discoverClusterNodes() ([]string, error) {
// Check if pvecm is available (only on Proxmox hosts)
if _, err := exec.LookPath("pvecm"); err != nil {
return nil, fmt.Errorf("pvecm not found - not running on Proxmox host")
}
// Get cluster status with IP addresses
cmd := exec.Command("pvecm", "status")
var out, stderr bytes.Buffer
cmd.Stdout = &out
cmd.Stderr = &stderr
err := cmd.Run()
// pvecm status exits with code 2 or 255 on standalone nodes (not in a cluster)
// Also handle LXC containers where pvecm can't access corosync IPC
// Treat these as valid cases and discover local host addresses
if err != nil {
stderrStr := stderr.String()
stdoutStr := out.String()
combinedOutput := stderrStr + stdoutStr
// First check for IPC/permission errors - these indicate a cluster exists but we can't access it
// These should NOT be treated as standalone mode
ipcErrorIndicators := []string{
"ipcc_send_rec",
"Unable to load access control list",
"access control list",
}
for _, indicator := range ipcErrorIndicators {
if strings.Contains(combinedOutput, indicator) {
log.Warn().
Str("stderr", stderrStr).
Msg("Cannot access Proxmox cluster IPC - cluster validation disabled. Add nodes to allowed_nodes in config if needed.")
// Return error to disable cluster validation rather than falling back to incorrect standalone mode
return nil, fmt.Errorf("pvecm cluster IPC access denied (check systemd restrictions or run outside container): %s", stderrStr)
}
}
// Now check for true standalone/no-cluster patterns
// These indicate the node genuinely isn't part of a cluster
standaloneIndicators := []string{
// Configuration missing
"does not exist", "not found", "no such file",
// Cluster state
"not part of a cluster", "no cluster", "standalone",
}
isStandalone := false
for _, indicator := range standaloneIndicators {
if strings.Contains(strings.ToLower(combinedOutput), strings.ToLower(indicator)) {
isStandalone = true
break
}
}
if isStandalone {
// Log at INFO level since this is expected for standalone scenarios
log.Info().
Str("exit_code", fmt.Sprintf("%v", err)).
Msg("Standalone Proxmox node detected - discovering local host addresses for validation")
return discoverLocalHostAddresses()
}
// For truly unexpected errors (rare), fail with full context for debugging
log.Warn().
Err(err).
Str("stderr", stderrStr).
Str("stdout", stdoutStr).
Msg("pvecm status failed with unexpected error - please report this if temperature monitoring doesn't work")
return nil, fmt.Errorf("failed to get cluster status: %w (stderr: %s, stdout: %s)", err, stderrStr, stdoutStr)
}
// Parse output to extract IP addresses
return parseClusterNodes(out.String())
}
// parseClusterNodes parses pvecm status output to extract IP addresses
func parseClusterNodes(output string) ([]string, error) {
var nodes []string
lines := strings.Split(output, "\n")
for _, line := range lines {
if strings.Contains(strings.ToLower(line), "qdevice") {
continue
}
fields := strings.Fields(line)
// Need at least 2 fields to be a valid node line (id + ip/name)
if len(fields) < 2 {
continue
}
// Iterate through fields to find the IP address
for _, field := range fields {
// Check if it's a valid IP
if ip := net.ParseIP(field); ip != nil {
nodes = append(nodes, field)
break
}
}
}
if len(nodes) == 0 {
return nil, fmt.Errorf("no cluster nodes found with IP addresses")
}
return nodes, nil
}
// discoverLocalHostAddresses discovers all addresses of the local host
// Used for standalone nodes that aren't part of a cluster
func discoverLocalHostAddresses() ([]string, error) {
addresses := make(map[string]struct{})
// Get hostname and FQDN
if hostname, err := os.Hostname(); err == nil && hostname != "" {
addresses[strings.ToLower(hostname)] = struct{}{}
// Try to get FQDN
cmd := exec.Command("hostname", "-f")
if out, err := cmd.Output(); err == nil {
fqdn := strings.TrimSpace(string(out))
if fqdn != "" && fqdn != hostname {
addresses[strings.ToLower(fqdn)] = struct{}{}
}
}
}
// This is more reliable than shelling out to 'ip addr' and works even with strict systemd restrictions
ipCount := 0
interfaces, err := netInterfaces()
if err != nil {
// Check if this is an AF_NETLINK restriction error from systemd
if strings.Contains(err.Error(), "netlinkrib") || strings.Contains(err.Error(), "address family not supported") {
log.Warn().
Err(err).
Msg("AF_NETLINK restricted by systemd - falling back to 'ip addr' command. Update systemd unit to add AF_NETLINK to RestrictAddressFamilies.")
return discoverLocalHostAddressesFallback()
}
log.Warn().
Err(err).
Msg("Failed to enumerate network interfaces - temperature monitoring may require manual allowed_nodes configuration")
} else {
for _, iface := range interfaces {
// Skip loopback interfaces
if iface.Flags&net.FlagLoopback != 0 {
continue
}
// Skip interfaces that are down
if iface.Flags&net.FlagUp == 0 {
continue
}
addrs, err := iface.Addrs()
if err != nil {
log.Debug().
Err(err).
Str("interface", iface.Name).
Msg("Failed to get addresses for interface")
continue
}
for _, addr := range addrs {
var ip net.IP
switch v := addr.(type) {
case *net.IPNet:
ip = v.IP
case *net.IPAddr:
ip = v.IP
default:
continue
}
// Skip loopback addresses
if ip.IsLoopback() {
continue
}
// Skip link-local IPv6 addresses
if ip.IsLinkLocalUnicast() {
continue
}
// Skip unspecified addresses
if ip.IsUnspecified() {
continue
}
addresses[ip.String()] = struct{}{}
ipCount++
}
}
}
// Convert map to slice
result := make([]string, 0, len(addresses))
for addr := range addresses {
result = append(result, addr)
}
if len(result) == 0 {
return nil, fmt.Errorf("no local host addresses found")
}
// Log helpful info about discovered addresses
logger := log.Info().
Strs("addresses", result).
Int("ip_count", ipCount).
Int("hostname_count", len(result)-ipCount)
if ipCount == 0 {
logger.Msg("WARNING: No IP addresses discovered for standalone node - only hostnames available. If temperature monitoring fails with 'node_not_cluster_member' errors, add the node's IP to allowed_nodes in /etc/pulse-sensor-proxy/config.yaml")
} else {
logger.Msg("Discovered local host addresses for standalone node validation")
}
return result, nil
}
// discoverLocalHostAddressesFallback uses 'ip addr' command when AF_NETLINK is restricted
func discoverLocalHostAddressesFallback() ([]string, error) {
addresses := make(map[string]struct{})
// Get hostname and FQDN (same as native version)
if hostname, err := os.Hostname(); err == nil && hostname != "" {
addresses[strings.ToLower(hostname)] = struct{}{}
cmd := exec.Command("hostname", "-f")
if out, err := cmd.Output(); err == nil {
fqdn := strings.TrimSpace(string(out))
if fqdn != "" && fqdn != hostname {
addresses[strings.ToLower(fqdn)] = struct{}{}
}
}
}
// Use 'ip addr' to get IP addresses
cmd := exec.Command("ip", "addr", "show")
out, err := cmd.Output()
if err != nil {
log.Warn().Err(err).Msg("Failed to run 'ip addr' command")
// Return at least the hostname
result := make([]string, 0, len(addresses))
for addr := range addresses {
result = append(result, addr)
}
return result, nil
}
// Parse 'ip addr' output for inet/inet6 lines
// Example: " inet 192.168.0.5/24 brd 192.168.0.255 scope global eno1"
ipCount := 0
lines := strings.Split(string(out), "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if !strings.HasPrefix(line, "inet ") && !strings.HasPrefix(line, "inet6 ") {
continue
}
fields := strings.Fields(line)
if len(fields) < 2 {
continue
}
// Second field is the IP/CIDR (e.g., "192.168.0.5/24")
ipCIDR := fields[1]
ipStr, _, _ := strings.Cut(ipCIDR, "/")
ip := net.ParseIP(ipStr)
if ip == nil || ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsUnspecified() {
continue
}
addresses[ip.String()] = struct{}{}
ipCount++
}
result := make([]string, 0, len(addresses))
for addr := range addresses {
result = append(result, addr)
}
if len(result) == 0 {
return nil, fmt.Errorf("no local host addresses found")
}
// Log helpful info about discovered addresses
logger := log.Info().
Strs("addresses", result).
Int("ip_count", ipCount).
Int("hostname_count", len(result)-ipCount)
if ipCount == 0 {
logger.Msg("WARNING: No IP addresses discovered via 'ip addr' fallback - only hostnames available. Temperature monitoring may require manual allowed_nodes configuration.")
} else {
logger.Msg("Discovered local host addresses via 'ip addr' fallback (systemd unit needs AF_NETLINK update)")
}
return result, nil
}
// isProxmoxHost checks if we're running on a Proxmox host
func isProxmoxHost() bool {
func isProxmoxHost() bool {
// Check for pvecm command
if _, err := execLookPath("pvecm"); err == nil {
return true
}
// Check for /etc/pve directory
if info, err := os.Stat("/etc/pve"); err == nil && info.IsDir() {
return true
}
return false
}
// isLocalNode checks if the requested node is the local machine
func isLocalNode(nodeHost string) bool {
// Get local hostname (short)
hostname, err := osHostname()
if err == nil {
// Match short hostname
if strings.EqualFold(nodeHost, hostname) {
return true
}
// Match FQDN if nodeHost contains dots
if strings.Contains(nodeHost, ".") {
cmd := execCommandFunc("hostname", "-f")
if output, err := cmd.Output(); err == nil {
fqdn := strings.TrimSpace(string(output))
if strings.EqualFold(nodeHost, fqdn) {
return true
}
}
}
}
// Check if nodeHost is a local IP address
ifaces, err := netInterfaces()
if err != nil {
return false
}
for _, iface := range ifaces {
addrs, err := iface.Addrs()
if err != nil {
continue
}
for _, addr := range addrs {
var ip net.IP
switch v := addr.(type) {
case *net.IPNet:
ip = v.IP
case *net.IPAddr:
ip = v.IP
}
if ip != nil && ip.String() == nodeHost {
return true
}
}
}
// Check special cases
if nodeHost == "localhost" || nodeHost == "127.0.0.1" || nodeHost == "::1" {
return true
}
return false
}
// getTemperatureLocal collects temperature data from the local machine
func (p *Proxy) getTemperatureLocal(ctx context.Context) (string, error) {
// Run the same command that the wrapper script runs with context timeout
cmd := exec.CommandContext(ctx, "sensors", "-j")
output, err := cmd.Output()
if err != nil {
// Try without -j flag as fallback
cmd = exec.CommandContext(ctx, "sensors")
if _, err = cmd.Output(); err != nil {
return "", fmt.Errorf("failed to run sensors: %w", err)
}
// Return empty JSON object for non-JSON output
return "{}", nil
}
result := strings.TrimSpace(string(output))
if result == "" {
return "{}", nil
}
return result, nil
}