diff --git a/.gitignore b/.gitignore
index c49e4ab0b..b51fb3eb9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -145,4 +145,4 @@ cloud-relay/
scripts/agent/
docs/internal/
claude.md
-pulse-temp-proxy
+/pulse-sensor-proxy
diff --git a/cmd/pulse-temp-proxy/auth.go b/cmd/pulse-sensor-proxy/auth.go
similarity index 51%
rename from cmd/pulse-temp-proxy/auth.go
rename to cmd/pulse-sensor-proxy/auth.go
index 0cc4c914f..64ccedadd 100644
--- a/cmd/pulse-temp-proxy/auth.go
+++ b/cmd/pulse-sensor-proxy/auth.go
@@ -8,18 +8,25 @@ import (
"github.com/rs/zerolog/log"
)
-// verifyPeerCredentials checks if the connecting process is authorized
-// Returns nil if authorized, error otherwise
-func verifyPeerCredentials(conn net.Conn) error {
+// peerCredentials holds extracted credentials from SO_PEERCRED
+type peerCredentials struct {
+ uid uint32
+ pid uint32
+ gid uint32
+}
+
+// extractPeerCredentials extracts and verifies peer credentials
+// Returns credentials if authorized, error otherwise
+func extractPeerCredentials(conn net.Conn) (*peerCredentials, error) {
// Get the underlying file descriptor
unixConn, ok := conn.(*net.UnixConn)
if !ok {
- return fmt.Errorf("not a unix connection")
+ return nil, fmt.Errorf("not a unix connection")
}
file, err := unixConn.File()
if err != nil {
- return fmt.Errorf("failed to get file descriptor: %w", err)
+ return nil, fmt.Errorf("failed to get file descriptor: %w", err)
}
defer file.Close()
@@ -28,7 +35,7 @@ func verifyPeerCredentials(conn net.Conn) error {
// Get peer credentials using SO_PEERCRED
cred, err := syscall.GetsockoptUcred(fd, syscall.SOL_SOCKET, syscall.SO_PEERCRED)
if err != nil {
- return fmt.Errorf("failed to get peer credentials: %w", err)
+ return nil, fmt.Errorf("failed to get peer credentials: %w", err)
}
log.Debug().
@@ -39,14 +46,29 @@ func verifyPeerCredentials(conn net.Conn) error {
// Allow root (UID 0) - this covers most service scenarios
if cred.Uid == 0 {
- return nil
+ return &peerCredentials{
+ uid: cred.Uid,
+ pid: uint32(cred.Pid),
+ gid: cred.Gid,
+ }, nil
}
// Allow the proxy's own user (for testing/debugging)
if cred.Uid == uint32(syscall.Getuid()) {
- return nil
+ return &peerCredentials{
+ uid: cred.Uid,
+ pid: uint32(cred.Pid),
+ gid: cred.Gid,
+ }, nil
}
// Reject all other users
- return fmt.Errorf("unauthorized: uid=%d gid=%d", cred.Uid, cred.Gid)
+ return nil, fmt.Errorf("unauthorized: uid=%d gid=%d", cred.Uid, cred.Gid)
+}
+
+// verifyPeerCredentials checks if the connecting process is authorized (legacy function)
+// Returns nil if authorized, error otherwise
+func verifyPeerCredentials(conn net.Conn) error {
+ _, err := extractPeerCredentials(conn)
+ return err
}
diff --git a/cmd/pulse-sensor-proxy/config.go b/cmd/pulse-sensor-proxy/config.go
new file mode 100644
index 000000000..ddb678a49
--- /dev/null
+++ b/cmd/pulse-sensor-proxy/config.go
@@ -0,0 +1,172 @@
+package main
+
+import (
+ "fmt"
+ "net"
+ "os"
+ "strings"
+
+ "github.com/rs/zerolog/log"
+ "gopkg.in/yaml.v3"
+)
+
+// Config holds proxy configuration
+type Config struct {
+ AllowedSourceSubnets []string `yaml:"allowed_source_subnets"`
+ MetricsAddress string `yaml:"metrics_address"`
+}
+
+// loadConfig loads configuration from file and environment variables
+func loadConfig(configPath string) (*Config, error) {
+ cfg := &Config{}
+
+ // Try to load config file if it exists
+ if configPath != "" {
+ if _, err := os.Stat(configPath); err == nil {
+ data, err := os.ReadFile(configPath)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read config file: %w", err)
+ }
+
+ if err := yaml.Unmarshal(data, cfg); err != nil {
+ return nil, fmt.Errorf("failed to parse config file: %w", err)
+ }
+
+ log.Info().
+ Str("config_file", configPath).
+ Int("subnet_count", len(cfg.AllowedSourceSubnets)).
+ Msg("Loaded configuration from file")
+ }
+ }
+
+ // Append from environment variable if set
+ if envSubnets := os.Getenv("PULSE_SENSOR_PROXY_ALLOWED_SUBNETS"); envSubnets != "" {
+ envList := strings.Split(envSubnets, ",")
+ cfg.AllowedSourceSubnets = append(cfg.AllowedSourceSubnets, envList...)
+ log.Info().
+ Int("env_subnet_count", len(envList)).
+ Msg("Appended subnets from environment variable")
+ }
+
+ // Metrics address from environment variable
+ if envMetrics := os.Getenv("PULSE_SENSOR_PROXY_METRICS_ADDR"); envMetrics != "" {
+ cfg.MetricsAddress = envMetrics
+ log.Info().Str("metrics_addr", envMetrics).Msg("Metrics address set from environment")
+ }
+
+ // Default metrics address if not configured
+ if cfg.MetricsAddress == "" {
+ cfg.MetricsAddress = "default" // Will use defaultMetricsAddr
+ }
+
+ // Parse and validate all subnets
+ if len(cfg.AllowedSourceSubnets) > 0 {
+ normalized, err := parseAllowedSubnets(cfg.AllowedSourceSubnets)
+ if err != nil {
+ return nil, fmt.Errorf("invalid subnet configuration: %w", err)
+ }
+ cfg.AllowedSourceSubnets = normalized
+ log.Info().
+ Strs("allowed_subnets", cfg.AllowedSourceSubnets).
+ Msg("Validated and normalized subnet configuration")
+ } else {
+ // Auto-detect if no configuration provided
+ detected := detectHostCIDRs()
+ if len(detected) == 0 {
+ log.Warn().Msg("No allowed_source_subnets configured and no host addresses detected")
+ } else {
+ cfg.AllowedSourceSubnets = detected
+ log.Warn().
+ Strs("auto_detected_subnets", detected).
+ Msg("No allowed_source_subnets configured; using detected host addresses (recommended to configure explicitly)")
+ }
+ }
+
+ return cfg, nil
+}
+
+// detectHostCIDRs detects local host IP addresses as /32 (IPv4) or /128 (IPv6) CIDRs
+func detectHostCIDRs() []string {
+ var cidrs []string
+
+ ifaces, err := net.Interfaces()
+ if err != nil {
+ log.Warn().Err(err).Msg("Failed to enumerate network interfaces")
+ return cidrs
+ }
+
+ for _, iface := range ifaces {
+ // Skip down or loopback interfaces
+ if iface.Flags&net.FlagUp == 0 || iface.Flags&net.FlagLoopback != 0 {
+ continue
+ }
+
+ addrs, err := iface.Addrs()
+ if err != nil {
+ log.Warn().Str("iface", iface.Name).Err(err).Msg("Address lookup failed")
+ continue
+ }
+
+ for _, addr := range addrs {
+ ipNet, ok := addr.(*net.IPNet)
+ if !ok {
+ continue
+ }
+
+ ip := ipNet.IP
+ // Skip loopback and link-local addresses
+ if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
+ continue
+ }
+
+ // Add as /32 for IPv4, /128 for IPv6
+ if ip.To4() != nil {
+ cidrs = append(cidrs, ip.String()+"/32")
+ } else if ip.To16() != nil {
+ cidrs = append(cidrs, ip.String()+"/128")
+ }
+ }
+ }
+
+ return cidrs
+}
+
+// parseAllowedSubnets validates and normalizes subnet specifications
+func parseAllowedSubnets(cfg []string) ([]string, error) {
+ seen := make(map[string]struct{})
+ var normalized []string
+
+ for _, raw := range cfg {
+ entry := strings.TrimSpace(raw)
+ if entry == "" {
+ continue
+ }
+
+ // Try parsing as CIDR
+ if _, _, err := net.ParseCIDR(entry); err == nil {
+ if _, exists := seen[entry]; !exists {
+ seen[entry] = struct{}{}
+ normalized = append(normalized, entry)
+ }
+ continue
+ }
+
+ // Try parsing as single IP
+ if ip := net.ParseIP(entry); ip != nil {
+ norm := entry + "/32"
+ if ip.To4() == nil {
+ norm = entry + "/128"
+ }
+ if _, exists := seen[norm]; !exists {
+ seen[norm] = struct{}{}
+ normalized = append(normalized, norm)
+ }
+ continue
+ }
+
+ // Invalid format
+ return nil, fmt.Errorf("invalid subnet or address: %s", entry)
+ }
+
+ return normalized, nil
+}
diff --git a/cmd/pulse-sensor-proxy/main.go b/cmd/pulse-sensor-proxy/main.go
new file mode 100644
index 000000000..cbcc0c726
--- /dev/null
+++ b/cmd/pulse-sensor-proxy/main.go
@@ -0,0 +1,732 @@
+package main
+
+import (
+ "context"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "net"
+ "os"
+ "os/signal"
+ "path/filepath"
+ "strings"
+ "syscall"
+ "time"
+
+ "github.com/rs/zerolog"
+ "github.com/rs/zerolog/log"
+ "github.com/spf13/cobra"
+)
+
+// Version information (set at build time with -ldflags)
+var (
+ Version = "dev"
+ BuildTime = "unknown"
+ GitCommit = "unknown"
+)
+
+const (
+ defaultSocketPath = "/run/pulse-sensor-proxy/pulse-sensor-proxy.sock"
+ defaultSSHKeyPath = "/var/lib/pulse-sensor-proxy/ssh"
+ defaultConfigPath = "/etc/pulse-sensor-proxy/config.yaml"
+ maxRequestBytes = 16 * 1024 // 16 KiB max request size
+)
+
+var rootCmd = &cobra.Command{
+ Use: "pulse-sensor-proxy",
+ Short: "Pulse Sensor Proxy - Secure sensor data bridge for containerized Pulse",
+ Long: `Sensor monitoring proxy that keeps SSH keys on the host and exposes sensor data via unix socket`,
+ Version: Version,
+ Run: func(cmd *cobra.Command, args []string) {
+ runProxy()
+ },
+}
+
+var versionCmd = &cobra.Command{
+ Use: "version",
+ Short: "Print version information",
+ Run: func(cmd *cobra.Command, args []string) {
+ fmt.Printf("pulse-sensor-proxy %s\n", Version)
+ if BuildTime != "unknown" {
+ fmt.Printf("Built: %s\n", BuildTime)
+ }
+ if GitCommit != "unknown" {
+ fmt.Printf("Commit: %s\n", GitCommit)
+ }
+ },
+}
+
+func init() {
+ rootCmd.AddCommand(versionCmd)
+}
+
+func main() {
+ if err := rootCmd.Execute(); err != nil {
+ fmt.Fprintf(os.Stderr, "Error: %v\n", err)
+ os.Exit(1)
+ }
+}
+
+// Proxy manages the temperature monitoring proxy
+type Proxy struct {
+ socketPath string
+ sshKeyPath string
+ listener net.Listener
+ rateLimiter *rateLimiter
+ nodeGate *nodeGate
+ router map[string]handlerFunc
+ config *Config
+ metrics *ProxyMetrics
+}
+
+// RPC request types
+const (
+ RPCEnsureClusterKeys = "ensure_cluster_keys"
+ RPCRegisterNodes = "register_nodes"
+ RPCGetTemperature = "get_temperature"
+ RPCGetStatus = "get_status"
+)
+
+// RPCRequest represents a request from Pulse
+type RPCRequest struct {
+ CorrelationID string `json:"correlation_id,omitempty"`
+ Method string `json:"method"`
+ Params map[string]interface{} `json:"params"`
+}
+
+// RPCResponse represents a response to Pulse
+type RPCResponse struct {
+ CorrelationID string `json:"correlation_id,omitempty"`
+ Success bool `json:"success"`
+ Data interface{} `json:"data,omitempty"`
+ Error string `json:"error,omitempty"`
+}
+
+// handlerFunc is the signature for RPC method handlers
+type handlerFunc func(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error)
+
+func runProxy() {
+ // Initialize logger
+ zerolog.TimeFieldFormat = zerolog.TimeFormatUnix
+ log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
+
+ socketPath := os.Getenv("PULSE_SENSOR_PROXY_SOCKET")
+ if socketPath == "" {
+ socketPath = defaultSocketPath
+ }
+
+ sshKeyPath := os.Getenv("PULSE_SENSOR_PROXY_SSH_DIR")
+ if sshKeyPath == "" {
+ sshKeyPath = defaultSSHKeyPath
+ }
+
+ // Load configuration
+ configPath := os.Getenv("PULSE_SENSOR_PROXY_CONFIG")
+ if configPath == "" {
+ configPath = defaultConfigPath
+ }
+
+ cfg, err := loadConfig(configPath)
+ if err != nil {
+ log.Fatal().Err(err).Msg("Failed to load configuration")
+ }
+
+ // Initialize metrics
+ metrics := NewProxyMetrics(Version)
+
+ log.Info().
+ Str("socket", socketPath).
+ Str("ssh_key_dir", sshKeyPath).
+ Str("config_path", configPath).
+ Str("version", Version).
+ Msg("Starting pulse-sensor-proxy")
+
+ proxy := &Proxy{
+ socketPath: socketPath,
+ sshKeyPath: sshKeyPath,
+ rateLimiter: newRateLimiter(),
+ nodeGate: newNodeGate(),
+ config: cfg,
+ metrics: metrics,
+ }
+
+ // Register RPC method handlers
+ proxy.router = map[string]handlerFunc{
+ RPCGetStatus: proxy.handleGetStatusV2,
+ RPCEnsureClusterKeys: proxy.handleEnsureClusterKeysV2,
+ RPCRegisterNodes: proxy.handleRegisterNodesV2,
+ RPCGetTemperature: proxy.handleGetTemperatureV2,
+ }
+
+ if err := proxy.Start(); err != nil {
+ log.Fatal().Err(err).Msg("Failed to start proxy")
+ }
+
+ // Start metrics server
+ if err := metrics.Start(cfg.MetricsAddress); err != nil {
+ log.Fatal().Err(err).Msg("Failed to start metrics server")
+ }
+
+ // Setup signal handlers
+ sigChan := make(chan os.Signal, 1)
+ signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
+
+ <-sigChan
+ log.Info().Msg("Shutting down proxy...")
+ proxy.Stop()
+ proxy.rateLimiter.shutdown()
+ metrics.Shutdown(context.Background())
+ log.Info().Msg("Proxy stopped")
+}
+
+// Start initializes and starts the proxy
+func (p *Proxy) Start() error {
+ // Create SSH key directory if it doesn't exist
+ if err := os.MkdirAll(p.sshKeyPath, 0700); err != nil {
+ return fmt.Errorf("failed to create SSH key directory: %w", err)
+ }
+
+ // Ensure SSH keypair exists
+ if err := p.ensureSSHKeypair(); err != nil {
+ return fmt.Errorf("failed to ensure SSH keypair: %w", err)
+ }
+
+ // Remove existing socket if it exists
+ if err := os.RemoveAll(p.socketPath); err != nil {
+ return fmt.Errorf("failed to remove existing socket: %w", err)
+ }
+
+ // Create socket directory if needed
+ socketDir := filepath.Dir(p.socketPath)
+ if err := os.MkdirAll(socketDir, 0755); err != nil {
+ return fmt.Errorf("failed to create socket directory: %w", err)
+ }
+
+ // Create unix socket listener
+ listener, err := net.Listen("unix", p.socketPath)
+ if err != nil {
+ return fmt.Errorf("failed to create unix socket: %w", err)
+ }
+ p.listener = listener
+
+ // Set socket permissions to owner+group only
+ // We use SO_PEERCRED for authentication, so we don't need world-readable
+ if err := os.Chmod(p.socketPath, 0660); err != nil {
+ log.Warn().Err(err).Msg("Failed to set socket permissions")
+ }
+
+ log.Info().Str("socket", p.socketPath).Msg("Unix socket ready")
+
+ // Start accepting connections
+ go p.acceptConnections()
+
+ return nil
+}
+
+// Stop shuts down the proxy
+func (p *Proxy) Stop() {
+ if p.listener != nil {
+ p.listener.Close()
+ os.Remove(p.socketPath)
+ }
+}
+
+// acceptConnections handles incoming socket connections
+func (p *Proxy) acceptConnections() {
+ for {
+ conn, err := p.listener.Accept()
+ if err != nil {
+ // Check if listener was closed
+ if opErr, ok := err.(*net.OpError); ok && opErr.Err.Error() == "use of closed network connection" {
+ return
+ }
+ log.Error().Err(err).Msg("Failed to accept connection")
+ continue
+ }
+
+ go p.handleConnection(conn)
+ }
+}
+
+// handleConnection processes a single RPC request with full validation and throttling
+func (p *Proxy) handleConnection(conn net.Conn) {
+ defer conn.Close()
+
+ // Track concurrent requests
+ p.metrics.queueDepth.Inc()
+ defer p.metrics.queueDepth.Dec()
+
+ // Start timing for latency metrics
+ startTime := time.Now()
+
+ ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+ defer cancel()
+
+ // Set read deadline
+ if err := conn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
+ log.Warn().Err(err).Msg("Failed to set read deadline")
+ }
+
+ // Extract and verify peer credentials
+ cred, err := extractPeerCredentials(conn)
+ if err != nil {
+ log.Warn().Err(err).Msg("Peer credentials unavailable")
+ p.sendErrorV2(conn, "unauthorized", "")
+ return
+ }
+
+ // Check rate limit and concurrency
+ releaseLimiter, ok := p.rateLimiter.allow(peerID{uid: cred.uid, pid: cred.pid})
+ if !ok {
+ p.metrics.rateLimitHits.Inc()
+ log.Warn().
+ Uint32("uid", cred.uid).
+ Uint32("pid", cred.pid).
+ Msg("Rate limit exceeded")
+ p.sendErrorV2(conn, "rate limit exceeded", "")
+ return
+ }
+ defer releaseLimiter()
+
+ // Limit request size and decode
+ lr := io.LimitReader(conn, maxRequestBytes)
+ decoder := json.NewDecoder(lr)
+ decoder.DisallowUnknownFields()
+
+ var req RPCRequest
+ if err := decoder.Decode(&req); err != nil {
+ if errors.Is(err, io.EOF) || err.Error() == "EOF" {
+ p.sendErrorV2(conn, "empty request", "")
+ return
+ }
+ p.sendErrorV2(conn, "invalid request format", "")
+ return
+ }
+
+ // Check if payload was too large
+ if decoder.More() {
+ p.sendErrorV2(conn, "payload too large", req.CorrelationID)
+ return
+ }
+
+ // Sanitize correlation ID
+ req.CorrelationID = sanitizeCorrelationID(req.CorrelationID)
+
+ // Create contextual logger
+ logger := log.With().
+ Str("corr_id", req.CorrelationID).
+ Uint32("uid", cred.uid).
+ Uint32("pid", cred.pid).
+ Str("method", req.Method).
+ Logger()
+
+ // Prepare response
+ resp := RPCResponse{
+ CorrelationID: req.CorrelationID,
+ Success: false,
+ }
+
+ // Find handler
+ handler := p.router[req.Method]
+ if handler == nil {
+ resp.Error = "unknown method"
+ logger.Warn().Msg("Unknown method")
+ p.sendResponse(conn, resp)
+ return
+ }
+
+ // Execute handler
+ result, err := handler(ctx, &req, logger)
+ if err != nil {
+ resp.Error = err.Error()
+ logger.Warn().Err(err).Msg("Handler failed")
+ p.sendResponse(conn, resp)
+ // Record failed request
+ p.metrics.rpcRequests.WithLabelValues(req.Method, "error").Inc()
+ p.metrics.rpcLatency.WithLabelValues(req.Method).Observe(time.Since(startTime).Seconds())
+ return
+ }
+
+ // Success
+ resp.Success = true
+ resp.Data = result
+ logger.Info().Msg("Request completed")
+ p.sendResponse(conn, resp)
+
+ // Record successful request
+ p.metrics.rpcRequests.WithLabelValues(req.Method, "success").Inc()
+ p.metrics.rpcLatency.WithLabelValues(req.Method).Observe(time.Since(startTime).Seconds())
+}
+
+// sendError sends an error response (legacy function)
+func (p *Proxy) sendError(conn net.Conn, message string) {
+ resp := RPCResponse{
+ Success: false,
+ Error: message,
+ }
+ encoder := json.NewEncoder(conn)
+ encoder.Encode(resp)
+}
+
+// sendErrorV2 sends an error response with correlation ID
+func (p *Proxy) sendErrorV2(conn net.Conn, message, correlationID string) {
+ resp := RPCResponse{
+ CorrelationID: correlationID,
+ Success: false,
+ Error: message,
+ }
+ encoder := json.NewEncoder(conn)
+ encoder.Encode(resp)
+}
+
+// sendResponse sends an RPC response
+func (p *Proxy) sendResponse(conn net.Conn, resp RPCResponse) {
+ encoder := json.NewEncoder(conn)
+ if err := encoder.Encode(resp); err != nil {
+ log.Error().Err(err).Msg("Failed to encode RPC response")
+ }
+}
+
+// handleGetStatus returns proxy status
+func (p *Proxy) handleGetStatus(req RPCRequest) RPCResponse {
+ pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub")
+ pubKey, err := os.ReadFile(pubKeyPath)
+ if err != nil {
+ return RPCResponse{
+ Success: false,
+ Error: fmt.Sprintf("failed to read public key: %v", err),
+ }
+ }
+
+ return RPCResponse{
+ Success: true,
+ Data: map[string]interface{}{
+ "version": Version,
+ "public_key": string(pubKey),
+ "ssh_dir": p.sshKeyPath,
+ },
+ }
+}
+
+// ensureSSHKeypair generates SSH keypair if it doesn't exist
+func (p *Proxy) ensureSSHKeypair() error {
+ privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
+ pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub")
+
+ // Check if keypair already exists
+ if _, err := os.Stat(privKeyPath); err == nil {
+ if _, err := os.Stat(pubKeyPath); err == nil {
+ log.Info().Msg("SSH keypair already exists")
+ return nil
+ }
+ }
+
+ log.Info().Msg("Generating new SSH keypair")
+
+ // Generate ed25519 keypair using ssh-keygen
+ cmd := fmt.Sprintf("ssh-keygen -t ed25519 -f %s -N '' -C 'pulse-sensor-proxy'", privKeyPath)
+ if output, err := execCommand(cmd); err != nil {
+ return fmt.Errorf("failed to generate SSH keypair: %w (output: %s)", err, output)
+ }
+
+ log.Info().Str("path", privKeyPath).Msg("SSH keypair generated")
+ return nil
+}
+
+// handleEnsureClusterKeys discovers cluster nodes and pushes SSH keys
+func (p *Proxy) handleEnsureClusterKeys(req RPCRequest) RPCResponse {
+ // Check if we're on a Proxmox host
+ if !isProxmoxHost() {
+ return RPCResponse{
+ Success: false,
+ Error: "not running on Proxmox host - cannot discover cluster",
+ }
+ }
+
+ // Discover cluster nodes
+ nodes, err := discoverClusterNodes()
+ if err != nil {
+ return RPCResponse{
+ Success: false,
+ Error: fmt.Sprintf("failed to discover cluster: %v", err),
+ }
+ }
+
+ log.Info().Strs("nodes", nodes).Msg("Discovered cluster nodes")
+
+ // Push SSH key to each node
+ results := make(map[string]interface{})
+ successCount := 0
+ for _, node := range nodes {
+ log.Info().Str("node", node).Msg("Pushing SSH key to node")
+ if err := p.pushSSHKey(node); err != nil {
+ log.Error().Err(err).Str("node", node).Msg("Failed to push SSH key")
+ results[node] = map[string]interface{}{
+ "success": false,
+ "error": err.Error(),
+ }
+ } else {
+ log.Info().Str("node", node).Msg("SSH key pushed successfully")
+ results[node] = map[string]interface{}{
+ "success": true,
+ }
+ successCount++
+ }
+ }
+
+ return RPCResponse{
+ Success: true,
+ Data: map[string]interface{}{
+ "nodes": nodes,
+ "results": results,
+ "success_count": successCount,
+ "total_count": len(nodes),
+ },
+ }
+}
+
+// handleRegisterNodes returns discovered nodes
+func (p *Proxy) handleRegisterNodes(req RPCRequest) RPCResponse {
+ // Check if we're on a Proxmox host
+ if !isProxmoxHost() {
+ return RPCResponse{
+ Success: false,
+ Error: "not running on Proxmox host",
+ }
+ }
+
+ // Discover cluster nodes
+ nodes, err := discoverClusterNodes()
+ if err != nil {
+ return RPCResponse{
+ Success: false,
+ Error: fmt.Sprintf("failed to discover nodes: %v", err),
+ }
+ }
+
+ // Test SSH connectivity to each node
+ nodeStatus := make([]map[string]interface{}, 0, len(nodes))
+ for _, node := range nodes {
+ status := map[string]interface{}{
+ "name": node,
+ }
+
+ if err := p.testSSHConnection(node); err != nil {
+ status["ssh_ready"] = false
+ status["error"] = err.Error()
+ } else {
+ status["ssh_ready"] = true
+ }
+
+ nodeStatus = append(nodeStatus, status)
+ }
+
+ return RPCResponse{
+ Success: true,
+ Data: map[string]interface{}{
+ "nodes": nodeStatus,
+ },
+ }
+}
+
+// handleGetTemperature fetches temperature data from a node via SSH
+func (p *Proxy) handleGetTemperature(req RPCRequest) RPCResponse {
+ // Extract node parameter
+ nodeParam, ok := req.Params["node"]
+ if !ok {
+ return RPCResponse{
+ Success: false,
+ Error: "missing 'node' parameter",
+ }
+ }
+
+ node, ok := nodeParam.(string)
+ if !ok {
+ return RPCResponse{
+ Success: false,
+ Error: "'node' parameter must be a string",
+ }
+ }
+
+ // Fetch temperature data
+ tempData, err := p.getTemperatureViaSSH(node)
+ if err != nil {
+ return RPCResponse{
+ Success: false,
+ Error: fmt.Sprintf("failed to get temperatures: %v", err),
+ }
+ }
+
+ return RPCResponse{
+ Success: true,
+ Data: map[string]interface{}{
+ "node": node,
+ "temperature": tempData,
+ },
+ }
+}
+
+// New V2 handlers with context and structured logging
+
+// handleGetStatusV2 returns proxy status with context support
+func (p *Proxy) handleGetStatusV2(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) {
+ pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub")
+ pubKey, err := os.ReadFile(pubKeyPath)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read public key: %w", err)
+ }
+
+ logger.Info().Msg("Status request served")
+ return map[string]interface{}{
+ "version": Version,
+ "public_key": string(pubKey),
+ "ssh_dir": p.sshKeyPath,
+ }, nil
+}
+
+// handleEnsureClusterKeysV2 discovers cluster nodes and pushes SSH keys with validation
+func (p *Proxy) handleEnsureClusterKeysV2(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) {
+ // Check if we're on a Proxmox host
+ if !isProxmoxHost() {
+ return nil, fmt.Errorf("not running on Proxmox host - cannot discover cluster")
+ }
+
+ // Check for optional key_dir parameter (for key rotation)
+ keyDir := p.sshKeyPath // default
+ if keyDirParam, ok := req.Params["key_dir"]; ok {
+ if keyDirStr, ok := keyDirParam.(string); ok && keyDirStr != "" {
+ keyDir = keyDirStr
+ logger.Info().Str("key_dir", keyDir).Msg("Using custom key directory for rotation")
+ }
+ }
+
+ // Discover cluster nodes
+ nodes, err := discoverClusterNodes()
+ if err != nil {
+ return nil, fmt.Errorf("failed to discover cluster: %w", err)
+ }
+
+ logger.Info().Strs("nodes", nodes).Msg("Discovered cluster nodes")
+
+ // Push SSH key to each node
+ results := make(map[string]interface{})
+ successCount := 0
+ for _, node := range nodes {
+ // Validate node name
+ if err := validateNodeName(node); err != nil {
+ logger.Warn().Str("node", node).Msg("Invalid node name format")
+ results[node] = map[string]interface{}{
+ "success": false,
+ "error": "invalid node name",
+ }
+ continue
+ }
+
+ logger.Info().Str("node", node).Str("key_dir", keyDir).Msg("Pushing SSH key to node")
+ if err := p.pushSSHKeyFrom(node, keyDir); err != nil {
+ logger.Error().Err(err).Str("node", node).Msg("Failed to push SSH key")
+ results[node] = map[string]interface{}{
+ "success": false,
+ "error": err.Error(),
+ }
+ } else {
+ logger.Info().Str("node", node).Msg("SSH key pushed successfully")
+ results[node] = map[string]interface{}{
+ "success": true,
+ }
+ successCount++
+ }
+ }
+
+ return map[string]interface{}{
+ "nodes": nodes,
+ "results": results,
+ "success_count": successCount,
+ "total_count": len(nodes),
+ }, nil
+}
+
+// handleRegisterNodesV2 returns discovered nodes with validation
+func (p *Proxy) handleRegisterNodesV2(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) {
+ // Check if we're on a Proxmox host
+ if !isProxmoxHost() {
+ return nil, fmt.Errorf("not running on Proxmox host")
+ }
+
+ // Discover cluster nodes
+ nodes, err := discoverClusterNodes()
+ if err != nil {
+ return nil, fmt.Errorf("failed to discover nodes: %w", err)
+ }
+
+ // Test SSH connectivity to each node
+ nodeStatus := make([]map[string]interface{}, 0, len(nodes))
+ for _, node := range nodes {
+ status := map[string]interface{}{
+ "name": node,
+ }
+
+ // Validate node name
+ if err := validateNodeName(node); err != nil {
+ status["ssh_ready"] = false
+ status["error"] = "invalid node name"
+ nodeStatus = append(nodeStatus, status)
+ continue
+ }
+
+ if err := p.testSSHConnection(node); err != nil {
+ status["ssh_ready"] = false
+ status["error"] = err.Error()
+ } else {
+ status["ssh_ready"] = true
+ }
+
+ nodeStatus = append(nodeStatus, status)
+ }
+
+ logger.Info().Int("node_count", len(nodeStatus)).Msg("Node discovery completed")
+ return map[string]interface{}{
+ "nodes": nodeStatus,
+ }, nil
+}
+
+// handleGetTemperatureV2 fetches temperature data with concurrency control and validation
+func (p *Proxy) handleGetTemperatureV2(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) {
+ // Extract node parameter
+ nodeParam, ok := req.Params["node"]
+ if !ok {
+ return nil, fmt.Errorf("missing 'node' parameter")
+ }
+
+ node, ok := nodeParam.(string)
+ if !ok {
+ return nil, fmt.Errorf("'node' parameter must be a string")
+ }
+
+ // Trim and validate node name
+ node = strings.TrimSpace(node)
+ if err := validateNodeName(node); err != nil {
+ logger.Warn().Str("node", node).Msg("Invalid node name format")
+ return nil, fmt.Errorf("invalid node name")
+ }
+
+ // Acquire per-node concurrency lock (prevents multiple simultaneous requests to same node)
+ releaseNode := p.nodeGate.acquire(node)
+ defer releaseNode()
+
+ logger.Debug().Str("node", node).Msg("Fetching temperature via SSH")
+
+ // Fetch temperature data
+ tempData, err := p.getTemperatureViaSSH(node)
+ if err != nil {
+ logger.Warn().Err(err).Str("node", node).Msg("Failed to get temperatures")
+ return nil, fmt.Errorf("failed to get temperatures: %w", err)
+ }
+
+ logger.Info().Str("node", node).Msg("Temperature data fetched successfully")
+ return map[string]interface{}{
+ "node": node,
+ "temperature": tempData,
+ }, nil
+}
diff --git a/cmd/pulse-sensor-proxy/metrics.go b/cmd/pulse-sensor-proxy/metrics.go
new file mode 100644
index 000000000..48955f1a9
--- /dev/null
+++ b/cmd/pulse-sensor-proxy/metrics.go
@@ -0,0 +1,167 @@
+package main
+
+import (
+ "context"
+ "net"
+ "net/http"
+ "strings"
+ "time"
+
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/client_golang/prometheus/promhttp"
+ "github.com/rs/zerolog/log"
+)
+
+const defaultMetricsAddr = "127.0.0.1:9127"
+
+// ProxyMetrics holds Prometheus metrics for the proxy
+type ProxyMetrics struct {
+ rpcRequests *prometheus.CounterVec
+ rpcLatency *prometheus.HistogramVec
+ sshRequests *prometheus.CounterVec
+ sshLatency *prometheus.HistogramVec
+ queueDepth prometheus.Gauge
+ rateLimitHits prometheus.Counter
+ buildInfo *prometheus.GaugeVec
+ server *http.Server
+ registry *prometheus.Registry
+}
+
+// NewProxyMetrics creates and registers all metrics
+func NewProxyMetrics(version string) *ProxyMetrics {
+ reg := prometheus.NewRegistry()
+
+ pm := &ProxyMetrics{
+ rpcRequests: prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Name: "pulse_proxy_rpc_requests_total",
+ Help: "Total RPC requests handled by method and result.",
+ },
+ []string{"method", "result"},
+ ),
+ rpcLatency: prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Name: "pulse_proxy_rpc_latency_seconds",
+ Help: "RPC handler latency.",
+ Buckets: []float64{0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2.5, 5},
+ },
+ []string{"method"},
+ ),
+ sshRequests: prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Name: "pulse_proxy_ssh_requests_total",
+ Help: "SSH command executions by node and result.",
+ },
+ []string{"node", "result"},
+ ),
+ sshLatency: prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Name: "pulse_proxy_ssh_latency_seconds",
+ Help: "SSH command latency per node.",
+ Buckets: []float64{0.1, 0.5, 1, 2.5, 5, 10, 30},
+ },
+ []string{"node"},
+ ),
+ queueDepth: prometheus.NewGauge(
+ prometheus.GaugeOpts{
+ Name: "pulse_proxy_queue_depth",
+ Help: "Concurrent RPC requests being processed.",
+ },
+ ),
+ rateLimitHits: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "pulse_proxy_rate_limit_hits_total",
+ Help: "Number of RPC requests rejected due to rate limiting.",
+ },
+ ),
+ buildInfo: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "pulse_proxy_build_info",
+ Help: "Proxy build metadata.",
+ },
+ []string{"version"},
+ ),
+ registry: reg,
+ }
+
+ reg.MustRegister(
+ pm.rpcRequests,
+ pm.rpcLatency,
+ pm.sshRequests,
+ pm.sshLatency,
+ pm.queueDepth,
+ pm.rateLimitHits,
+ pm.buildInfo,
+ )
+
+ pm.buildInfo.WithLabelValues(version).Set(1)
+
+ return pm
+}
+
+// Start starts the metrics HTTP server on the specified address
+func (m *ProxyMetrics) Start(addr string) error {
+ if addr == "" || strings.ToLower(addr) == "disabled" {
+ log.Info().Msg("Metrics server disabled")
+ return nil
+ }
+
+ if addr == "default" {
+ addr = defaultMetricsAddr
+ }
+
+ mux := http.NewServeMux()
+ mux.Handle("/metrics", promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{}))
+
+ ln, err := net.Listen("tcp", addr)
+ if err != nil {
+ return err
+ }
+
+ m.server = &http.Server{
+ Addr: addr,
+ Handler: mux,
+ ReadHeaderTimeout: 5 * time.Second,
+ }
+
+ go func() {
+ if err := m.server.Serve(ln); err != nil && err != http.ErrServerClosed {
+ log.Error().Err(err).Str("addr", addr).Msg("Metrics server stopped unexpectedly")
+ }
+ }()
+
+ log.Info().Str("addr", addr).Msg("Metrics server started")
+ return nil
+}
+
+// Shutdown gracefully shuts down the metrics server
+func (m *ProxyMetrics) Shutdown(ctx context.Context) {
+ if m.server != nil {
+ _ = m.server.Shutdown(ctx)
+ }
+}
+
+// sanitizeNodeLabel converts a node name into a safe Prometheus label value
+func sanitizeNodeLabel(node string) string {
+ const maxLen = 63
+ safe := strings.Builder{}
+ safe.Grow(len(node))
+
+ for _, r := range strings.ToLower(node) {
+ if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' || r == '.' {
+ safe.WriteRune(r)
+ } else {
+ safe.WriteRune('_')
+ }
+ }
+
+ out := safe.String()
+ if len(out) > maxLen {
+ out = out[:maxLen]
+ }
+ if out == "" {
+ out = "unknown"
+ }
+
+ return out
+}
diff --git a/cmd/pulse-sensor-proxy/ssh.go b/cmd/pulse-sensor-proxy/ssh.go
new file mode 100644
index 000000000..69bbdf387
--- /dev/null
+++ b/cmd/pulse-sensor-proxy/ssh.go
@@ -0,0 +1,242 @@
+package main
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "time"
+
+ "github.com/rs/zerolog/log"
+)
+
+// execCommand executes a shell command and returns output
+func execCommand(cmd string) (string, error) {
+ out, err := exec.Command("sh", "-c", cmd).CombinedOutput()
+ return string(out), err
+}
+
+// getPublicKey reads the SSH public key from the default directory
+func (p *Proxy) getPublicKey() (string, error) {
+ return p.getPublicKeyFrom(p.sshKeyPath)
+}
+
+// getPublicKeyFrom reads the SSH public key from a specific directory
+func (p *Proxy) getPublicKeyFrom(keyDir string) (string, error) {
+ pubKeyPath := filepath.Join(keyDir, "id_ed25519.pub")
+ data, err := os.ReadFile(pubKeyPath)
+ if err != nil {
+ return "", err
+ }
+ return strings.TrimSpace(string(data)), nil
+}
+
+// buildAuthorizedKey constructs an authorized_keys entry with from= IP restrictions
+func (p *Proxy) buildAuthorizedKey(pubKey string) (string, error) {
+ subnets := p.config.AllowedSourceSubnets
+ if len(subnets) == 0 {
+ return "", fmt.Errorf("no allowed source subnets configured or detected")
+ }
+
+ // Build from= clause with all allowed subnets
+ fromClause := fmt.Sprintf(`from="%s"`, strings.Join(subnets, ","))
+
+ // Comment helps identify and upgrade this key later
+ const comment = "pulse-sensor-proxy"
+
+ // Forced command with all restrictions
+ const forced = `command="sensors -j",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty`
+
+ // Format: from="...",command="...",no-* ssh-rsa AAAA... pulse-sensor-proxy
+ return fmt.Sprintf(`%s,%s %s %s`, fromClause, forced, pubKey, comment), nil
+}
+
+// pushSSHKeyFrom pushes a public key from a specific directory to a node
+func (p *Proxy) pushSSHKeyFrom(nodeHost, keyDir string) error {
+ startTime := time.Now()
+ nodeLabel := sanitizeNodeLabel(nodeHost)
+
+ pubKey, err := p.getPublicKeyFrom(keyDir)
+ if err != nil {
+ p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
+ p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
+ return fmt.Errorf("failed to get public key from %s: %w", keyDir, err)
+ }
+
+ // Build the restricted authorized_keys entry
+ entry, err := p.buildAuthorizedKey(pubKey)
+ if err != nil {
+ p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
+ p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
+ return fmt.Errorf("failed to build authorized key: %w", err)
+ }
+
+ // Check if the exact restricted entry already exists
+ checkCmd := fmt.Sprintf(
+ `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "grep -F '%s' /root/.ssh/authorized_keys 2>/dev/null"`,
+ nodeHost,
+ entry,
+ )
+
+ if output, _ := execCommand(checkCmd); strings.Contains(output, entry) {
+ log.Debug().Str("node", nodeHost).Msg("SSH key already present with from= restrictions")
+ p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc()
+ p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
+ return nil
+ }
+
+ // Remove old pulse-temp-proxy and pulse-sensor-proxy entries (for upgrade path)
+ removeOldCmd := fmt.Sprintf(
+ `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "mkdir -p /root/.ssh && chmod 700 /root/.ssh && grep -v -e 'pulse-temp-proxy$' -e 'pulse-sensor-proxy$' /root/.ssh/authorized_keys > /root/.ssh/authorized_keys.tmp 2>/dev/null || touch /root/.ssh/authorized_keys.tmp"`,
+ nodeHost,
+ )
+
+ if _, err := execCommand(removeOldCmd); err != nil {
+ p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
+ p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
+ return fmt.Errorf("failed to prepare authorized_keys on %s: %w", nodeHost, err)
+ }
+
+ // Add the new restricted key and atomically replace the file
+ addCmd := fmt.Sprintf(
+ `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "echo '%s' >> /root/.ssh/authorized_keys.tmp && mv /root/.ssh/authorized_keys.tmp /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"`,
+ nodeHost,
+ entry,
+ )
+
+ if _, err := execCommand(addCmd); err != nil {
+ p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
+ p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
+ return fmt.Errorf("failed to add SSH key to %s: %w", nodeHost, err)
+ }
+
+ log.Info().
+ Str("node", nodeHost).
+ Str("key_dir", keyDir).
+ Strs("allowed_subnets", p.config.AllowedSourceSubnets).
+ Msg("SSH key installed with from= IP restrictions")
+
+ p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc()
+ p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
+ return nil
+}
+
+// pushSSHKey adds the proxy's public key to a node's authorized_keys with IP restrictions
+// Automatically upgrades old keys without from= restrictions
+func (p *Proxy) pushSSHKey(nodeHost string) error {
+ return p.pushSSHKeyFrom(nodeHost, p.sshKeyPath)
+}
+
+// testSSHConnection verifies SSH connectivity to a node
+func (p *Proxy) testSSHConnection(nodeHost string) error {
+ startTime := time.Now()
+ nodeLabel := sanitizeNodeLabel(nodeHost)
+
+ privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
+ cmd := fmt.Sprintf(
+ `ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s "echo test"`,
+ privKeyPath,
+ nodeHost,
+ )
+
+ output, err := execCommand(cmd)
+ if err != nil {
+ p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
+ p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
+ return fmt.Errorf("SSH test failed: %w (output: %s)", err, output)
+ }
+
+ // The forced command will run "sensors -j" instead of "echo test"
+ // So we should get JSON output, not "test"
+ // For now, just check that connection succeeded
+ p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc()
+ p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
+ return nil
+}
+
+// getTemperatureViaSSH fetches temperature data from a node
+func (p *Proxy) getTemperatureViaSSH(nodeHost string) (string, error) {
+ startTime := time.Now()
+ nodeLabel := sanitizeNodeLabel(nodeHost)
+
+ privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
+
+ // Since we use ForceCommand="sensors -j", any SSH command will run sensors
+ // We don't need to specify the command
+ cmd := fmt.Sprintf(
+ `ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s ""`,
+ privKeyPath,
+ nodeHost,
+ )
+
+ output, err := execCommand(cmd)
+ if err != nil {
+ p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
+ p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
+ return "", fmt.Errorf("failed to fetch temperatures: %w", err)
+ }
+
+ p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc()
+ p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
+ return output, nil
+}
+
+// discoverClusterNodes discovers all nodes in the Proxmox cluster
+func discoverClusterNodes() ([]string, error) {
+ // Check if pvecm is available (only on Proxmox hosts)
+ if _, err := exec.LookPath("pvecm"); err != nil {
+ return nil, fmt.Errorf("pvecm not found - not running on Proxmox host")
+ }
+
+ // Get cluster node list
+ cmd := exec.Command("pvecm", "nodes")
+ var out bytes.Buffer
+ cmd.Stdout = &out
+ if err := cmd.Run(); err != nil {
+ return nil, fmt.Errorf("failed to get cluster nodes: %w", err)
+ }
+
+ // Parse output
+ // Format:
+ // Membership information
+ // ----------------------
+ // Nodeid Votes Name
+ // 1 1 node1
+ // 2 1 node2
+
+ var nodes []string
+ lines := strings.Split(out.String(), "\n")
+ for _, line := range lines {
+ fields := strings.Fields(line)
+ // Skip header lines and empty lines
+ if len(fields) < 3 {
+ continue
+ }
+ // Check if first field is numeric (node ID)
+ if fields[0][0] >= '0' && fields[0][0] <= '9' {
+ nodeName := fields[2]
+ nodes = append(nodes, nodeName)
+ }
+ }
+
+ if len(nodes) == 0 {
+ return nil, fmt.Errorf("no cluster nodes found")
+ }
+
+ return nodes, nil
+}
+
+// isProxmoxHost checks if we're running on a Proxmox host
+func isProxmoxHost() bool {
+ // Check for pvecm command
+ if _, err := exec.LookPath("pvecm"); err == nil {
+ return true
+ }
+ // Check for /etc/pve directory
+ if info, err := os.Stat("/etc/pve"); err == nil && info.IsDir() {
+ return true
+ }
+ return false
+}
diff --git a/cmd/pulse-sensor-proxy/throttle.go b/cmd/pulse-sensor-proxy/throttle.go
new file mode 100644
index 000000000..946de00df
--- /dev/null
+++ b/cmd/pulse-sensor-proxy/throttle.go
@@ -0,0 +1,140 @@
+package main
+
+import (
+ "sync"
+ "time"
+
+ "golang.org/x/time/rate"
+)
+
+// peerID identifies a connecting process by UID+PID
+type peerID struct {
+ uid uint32
+ pid uint32
+}
+
+// limiterEntry holds rate limiting and concurrency controls for a peer
+type limiterEntry struct {
+ limiter *rate.Limiter // throughput: 20/min with burst 10
+ semaphore chan struct{} // concurrency: cap 10
+ lastSeen time.Time
+}
+
+// rateLimiter manages per-peer rate limits and concurrency
+type rateLimiter struct {
+ mu sync.Mutex
+ entries map[peerID]*limiterEntry
+ quitChan chan struct{}
+}
+
+// newRateLimiter creates a new rate limiter with cleanup loop
+func newRateLimiter() *rateLimiter {
+ rl := &rateLimiter{
+ entries: make(map[peerID]*limiterEntry),
+ quitChan: make(chan struct{}),
+ }
+ go rl.cleanupLoop()
+ return rl
+}
+
+// allow checks if a peer is allowed to make a request and reserves a concurrency slot
+// Returns a release function and whether the request is allowed
+func (rl *rateLimiter) allow(id peerID) (release func(), allowed bool) {
+ rl.mu.Lock()
+ entry := rl.entries[id]
+ if entry == nil {
+ entry = &limiterEntry{
+ limiter: rate.NewLimiter(rate.Every(time.Minute/20), 10), // 20/min, burst 10
+ semaphore: make(chan struct{}, 10), // max 10 concurrent
+ }
+ rl.entries[id] = entry
+ }
+ entry.lastSeen = time.Now()
+ rl.mu.Unlock()
+
+ // Check rate limit
+ if !entry.limiter.Allow() {
+ return nil, false
+ }
+
+ // Try to acquire concurrency slot
+ select {
+ case entry.semaphore <- struct{}{}:
+ return func() { <-entry.semaphore }, true
+ default:
+ return nil, false // max concurrent in-flight reached
+ }
+}
+
+// cleanupLoop periodically removes idle peer entries
+func (rl *rateLimiter) cleanupLoop() {
+ ticker := time.NewTicker(5 * time.Minute)
+ defer ticker.Stop()
+ for {
+ select {
+ case <-ticker.C:
+ rl.mu.Lock()
+ for id, entry := range rl.entries {
+ if time.Since(entry.lastSeen) > 10*time.Minute {
+ delete(rl.entries, id)
+ }
+ }
+ rl.mu.Unlock()
+ case <-rl.quitChan:
+ return
+ }
+ }
+}
+
+// shutdown stops the cleanup loop
+func (rl *rateLimiter) shutdown() {
+ close(rl.quitChan)
+}
+
+// nodeGate controls per-node concurrency for temperature requests
+type nodeGate struct {
+ mu sync.Mutex
+ inFlight map[string]*nodeLock
+}
+
+// nodeLock tracks in-flight requests for a specific node
+type nodeLock struct {
+ refCount int
+ guard chan struct{}
+}
+
+// newNodeGate creates a new node concurrency gate
+func newNodeGate() *nodeGate {
+ return &nodeGate{
+ inFlight: make(map[string]*nodeLock),
+ }
+}
+
+// acquire gets exclusive access to make requests to a node
+// Returns a release function that must be called when done
+func (g *nodeGate) acquire(node string) func() {
+ g.mu.Lock()
+ lock := g.inFlight[node]
+ if lock == nil {
+ lock = &nodeLock{
+ guard: make(chan struct{}, 1), // single slot = only one SSH fetch per node
+ }
+ g.inFlight[node] = lock
+ }
+ lock.refCount++
+ g.mu.Unlock()
+
+ // Wait for exclusive access
+ lock.guard <- struct{}{}
+
+ // Return release function
+ return func() {
+ <-lock.guard
+ g.mu.Lock()
+ lock.refCount--
+ if lock.refCount == 0 {
+ delete(g.inFlight, node)
+ }
+ g.mu.Unlock()
+ }
+}
diff --git a/cmd/pulse-sensor-proxy/validation.go b/cmd/pulse-sensor-proxy/validation.go
new file mode 100644
index 000000000..b0fbe13aa
--- /dev/null
+++ b/cmd/pulse-sensor-proxy/validation.go
@@ -0,0 +1,33 @@
+package main
+
+import (
+ "fmt"
+ "regexp"
+
+ "github.com/google/uuid"
+)
+
+var (
+ // nodeNameRegex validates node names (alphanumeric, dots, underscores, hyphens, 1-64 chars)
+ nodeNameRegex = regexp.MustCompile(`^[a-zA-Z0-9._-]{1,64}$`)
+)
+
+// sanitizeCorrelationID validates and sanitizes a correlation ID
+// Returns a valid UUID, generating a new one if input is missing or invalid
+func sanitizeCorrelationID(id string) string {
+ if id == "" {
+ return uuid.NewString()
+ }
+ if _, err := uuid.Parse(id); err != nil {
+ return uuid.NewString()
+ }
+ return id
+}
+
+// validateNodeName checks if a node name is in valid format
+func validateNodeName(name string) error {
+ if !nodeNameRegex.MatchString(name) {
+ return fmt.Errorf("invalid node name")
+ }
+ return nil
+}
diff --git a/cmd/pulse-temp-proxy/main.go b/cmd/pulse-temp-proxy/main.go
deleted file mode 100644
index 504ac26fa..000000000
--- a/cmd/pulse-temp-proxy/main.go
+++ /dev/null
@@ -1,434 +0,0 @@
-package main
-
-import (
- "encoding/json"
- "fmt"
- "net"
- "os"
- "os/signal"
- "path/filepath"
- "syscall"
-
- "github.com/rs/zerolog"
- "github.com/rs/zerolog/log"
- "github.com/spf13/cobra"
-)
-
-// Version information (set at build time with -ldflags)
-var (
- Version = "dev"
- BuildTime = "unknown"
- GitCommit = "unknown"
-)
-
-const (
- defaultSocketPath = "/run/pulse-temp-proxy/pulse-temp-proxy.sock"
- defaultSSHKeyPath = "/var/lib/pulse-temp-proxy/ssh"
-)
-
-var rootCmd = &cobra.Command{
- Use: "pulse-temp-proxy",
- Short: "Pulse Temperature Proxy - Secure SSH bridge for containerized Pulse",
- Long: `Temperature monitoring proxy that keeps SSH keys on the host and exposes temperature data via unix socket`,
- Version: Version,
- Run: func(cmd *cobra.Command, args []string) {
- runProxy()
- },
-}
-
-var versionCmd = &cobra.Command{
- Use: "version",
- Short: "Print version information",
- Run: func(cmd *cobra.Command, args []string) {
- fmt.Printf("pulse-temp-proxy %s\n", Version)
- if BuildTime != "unknown" {
- fmt.Printf("Built: %s\n", BuildTime)
- }
- if GitCommit != "unknown" {
- fmt.Printf("Commit: %s\n", GitCommit)
- }
- },
-}
-
-func init() {
- rootCmd.AddCommand(versionCmd)
-}
-
-func main() {
- if err := rootCmd.Execute(); err != nil {
- fmt.Fprintf(os.Stderr, "Error: %v\n", err)
- os.Exit(1)
- }
-}
-
-// Proxy manages the temperature monitoring proxy
-type Proxy struct {
- socketPath string
- sshKeyPath string
- listener net.Listener
-}
-
-// RPC request types
-const (
- RPCEnsureClusterKeys = "ensure_cluster_keys"
- RPCRegisterNodes = "register_nodes"
- RPCGetTemperature = "get_temperature"
- RPCGetStatus = "get_status"
-)
-
-// RPCRequest represents a request from Pulse
-type RPCRequest struct {
- Method string `json:"method"`
- Params map[string]interface{} `json:"params"`
-}
-
-// RPCResponse represents a response to Pulse
-type RPCResponse struct {
- Success bool `json:"success"`
- Data interface{} `json:"data,omitempty"`
- Error string `json:"error,omitempty"`
-}
-
-func runProxy() {
- // Initialize logger
- zerolog.TimeFieldFormat = zerolog.TimeFormatUnix
- log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
-
- socketPath := os.Getenv("PULSE_TEMP_PROXY_SOCKET")
- if socketPath == "" {
- socketPath = defaultSocketPath
- }
-
- sshKeyPath := os.Getenv("PULSE_TEMP_PROXY_SSH_DIR")
- if sshKeyPath == "" {
- sshKeyPath = defaultSSHKeyPath
- }
-
- log.Info().
- Str("socket", socketPath).
- Str("ssh_key_dir", sshKeyPath).
- Msg("Starting pulse-temp-proxy")
-
- proxy := &Proxy{
- socketPath: socketPath,
- sshKeyPath: sshKeyPath,
- }
-
- if err := proxy.Start(); err != nil {
- log.Fatal().Err(err).Msg("Failed to start proxy")
- }
-
- // Setup signal handlers
- sigChan := make(chan os.Signal, 1)
- signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
-
- <-sigChan
- log.Info().Msg("Shutting down proxy...")
- proxy.Stop()
- log.Info().Msg("Proxy stopped")
-}
-
-// Start initializes and starts the proxy
-func (p *Proxy) Start() error {
- // Create SSH key directory if it doesn't exist
- if err := os.MkdirAll(p.sshKeyPath, 0700); err != nil {
- return fmt.Errorf("failed to create SSH key directory: %w", err)
- }
-
- // Ensure SSH keypair exists
- if err := p.ensureSSHKeypair(); err != nil {
- return fmt.Errorf("failed to ensure SSH keypair: %w", err)
- }
-
- // Remove existing socket if it exists
- if err := os.RemoveAll(p.socketPath); err != nil {
- return fmt.Errorf("failed to remove existing socket: %w", err)
- }
-
- // Create socket directory if needed
- socketDir := filepath.Dir(p.socketPath)
- if err := os.MkdirAll(socketDir, 0755); err != nil {
- return fmt.Errorf("failed to create socket directory: %w", err)
- }
-
- // Create unix socket listener
- listener, err := net.Listen("unix", p.socketPath)
- if err != nil {
- return fmt.Errorf("failed to create unix socket: %w", err)
- }
- p.listener = listener
-
- // Set socket permissions to owner+group only
- // We use SO_PEERCRED for authentication, so we don't need world-readable
- if err := os.Chmod(p.socketPath, 0660); err != nil {
- log.Warn().Err(err).Msg("Failed to set socket permissions")
- }
-
- log.Info().Str("socket", p.socketPath).Msg("Unix socket ready")
-
- // Start accepting connections
- go p.acceptConnections()
-
- return nil
-}
-
-// Stop shuts down the proxy
-func (p *Proxy) Stop() {
- if p.listener != nil {
- p.listener.Close()
- os.Remove(p.socketPath)
- }
-}
-
-// acceptConnections handles incoming socket connections
-func (p *Proxy) acceptConnections() {
- for {
- conn, err := p.listener.Accept()
- if err != nil {
- // Check if listener was closed
- if opErr, ok := err.(*net.OpError); ok && opErr.Err.Error() == "use of closed network connection" {
- return
- }
- log.Error().Err(err).Msg("Failed to accept connection")
- continue
- }
-
- go p.handleConnection(conn)
- }
-}
-
-// handleConnection processes a single RPC request
-func (p *Proxy) handleConnection(conn net.Conn) {
- defer conn.Close()
-
- // Verify peer credentials (SO_PEERCRED authentication)
- if err := verifyPeerCredentials(conn); err != nil {
- log.Warn().Err(err).Msg("Unauthorized connection attempt")
- p.sendError(conn, "unauthorized")
- return
- }
-
- // Decode request
- var req RPCRequest
- decoder := json.NewDecoder(conn)
- if err := decoder.Decode(&req); err != nil {
- log.Error().Err(err).Msg("Failed to decode RPC request")
- p.sendError(conn, "invalid request format")
- return
- }
-
- log.Debug().Str("method", req.Method).Msg("Received RPC request")
-
- // Route to handler
- var resp RPCResponse
- switch req.Method {
- case RPCGetStatus:
- resp = p.handleGetStatus(req)
- case RPCEnsureClusterKeys:
- resp = p.handleEnsureClusterKeys(req)
- case RPCRegisterNodes:
- resp = p.handleRegisterNodes(req)
- case RPCGetTemperature:
- resp = p.handleGetTemperature(req)
- default:
- resp = RPCResponse{
- Success: false,
- Error: fmt.Sprintf("unknown method: %s", req.Method),
- }
- }
-
- // Send response
- encoder := json.NewEncoder(conn)
- if err := encoder.Encode(resp); err != nil {
- log.Error().Err(err).Msg("Failed to encode RPC response")
- }
-}
-
-// sendError sends an error response
-func (p *Proxy) sendError(conn net.Conn, message string) {
- resp := RPCResponse{
- Success: false,
- Error: message,
- }
- encoder := json.NewEncoder(conn)
- encoder.Encode(resp)
-}
-
-// handleGetStatus returns proxy status
-func (p *Proxy) handleGetStatus(req RPCRequest) RPCResponse {
- pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub")
- pubKey, err := os.ReadFile(pubKeyPath)
- if err != nil {
- return RPCResponse{
- Success: false,
- Error: fmt.Sprintf("failed to read public key: %v", err),
- }
- }
-
- return RPCResponse{
- Success: true,
- Data: map[string]interface{}{
- "version": Version,
- "public_key": string(pubKey),
- "ssh_dir": p.sshKeyPath,
- },
- }
-}
-
-// ensureSSHKeypair generates SSH keypair if it doesn't exist
-func (p *Proxy) ensureSSHKeypair() error {
- privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
- pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub")
-
- // Check if keypair already exists
- if _, err := os.Stat(privKeyPath); err == nil {
- if _, err := os.Stat(pubKeyPath); err == nil {
- log.Info().Msg("SSH keypair already exists")
- return nil
- }
- }
-
- log.Info().Msg("Generating new SSH keypair")
-
- // Generate ed25519 keypair using ssh-keygen
- cmd := fmt.Sprintf("ssh-keygen -t ed25519 -f %s -N '' -C 'pulse-temp-proxy'", privKeyPath)
- if output, err := execCommand(cmd); err != nil {
- return fmt.Errorf("failed to generate SSH keypair: %w (output: %s)", err, output)
- }
-
- log.Info().Str("path", privKeyPath).Msg("SSH keypair generated")
- return nil
-}
-
-// handleEnsureClusterKeys discovers cluster nodes and pushes SSH keys
-func (p *Proxy) handleEnsureClusterKeys(req RPCRequest) RPCResponse {
- // Check if we're on a Proxmox host
- if !isProxmoxHost() {
- return RPCResponse{
- Success: false,
- Error: "not running on Proxmox host - cannot discover cluster",
- }
- }
-
- // Discover cluster nodes
- nodes, err := discoverClusterNodes()
- if err != nil {
- return RPCResponse{
- Success: false,
- Error: fmt.Sprintf("failed to discover cluster: %v", err),
- }
- }
-
- log.Info().Strs("nodes", nodes).Msg("Discovered cluster nodes")
-
- // Push SSH key to each node
- results := make(map[string]interface{})
- successCount := 0
- for _, node := range nodes {
- log.Info().Str("node", node).Msg("Pushing SSH key to node")
- if err := p.pushSSHKey(node); err != nil {
- log.Error().Err(err).Str("node", node).Msg("Failed to push SSH key")
- results[node] = map[string]interface{}{
- "success": false,
- "error": err.Error(),
- }
- } else {
- log.Info().Str("node", node).Msg("SSH key pushed successfully")
- results[node] = map[string]interface{}{
- "success": true,
- }
- successCount++
- }
- }
-
- return RPCResponse{
- Success: true,
- Data: map[string]interface{}{
- "nodes": nodes,
- "results": results,
- "success_count": successCount,
- "total_count": len(nodes),
- },
- }
-}
-
-// handleRegisterNodes returns discovered nodes
-func (p *Proxy) handleRegisterNodes(req RPCRequest) RPCResponse {
- // Check if we're on a Proxmox host
- if !isProxmoxHost() {
- return RPCResponse{
- Success: false,
- Error: "not running on Proxmox host",
- }
- }
-
- // Discover cluster nodes
- nodes, err := discoverClusterNodes()
- if err != nil {
- return RPCResponse{
- Success: false,
- Error: fmt.Sprintf("failed to discover nodes: %v", err),
- }
- }
-
- // Test SSH connectivity to each node
- nodeStatus := make([]map[string]interface{}, 0, len(nodes))
- for _, node := range nodes {
- status := map[string]interface{}{
- "name": node,
- }
-
- if err := p.testSSHConnection(node); err != nil {
- status["ssh_ready"] = false
- status["error"] = err.Error()
- } else {
- status["ssh_ready"] = true
- }
-
- nodeStatus = append(nodeStatus, status)
- }
-
- return RPCResponse{
- Success: true,
- Data: map[string]interface{}{
- "nodes": nodeStatus,
- },
- }
-}
-
-// handleGetTemperature fetches temperature data from a node via SSH
-func (p *Proxy) handleGetTemperature(req RPCRequest) RPCResponse {
- // Extract node parameter
- nodeParam, ok := req.Params["node"]
- if !ok {
- return RPCResponse{
- Success: false,
- Error: "missing 'node' parameter",
- }
- }
-
- node, ok := nodeParam.(string)
- if !ok {
- return RPCResponse{
- Success: false,
- Error: "'node' parameter must be a string",
- }
- }
-
- // Fetch temperature data
- tempData, err := p.getTemperatureViaSSH(node)
- if err != nil {
- return RPCResponse{
- Success: false,
- Error: fmt.Sprintf("failed to get temperatures: %v", err),
- }
- }
-
- return RPCResponse{
- Success: true,
- Data: map[string]interface{}{
- "node": node,
- "temperature": tempData,
- },
- }
-}
diff --git a/cmd/pulse-temp-proxy/ssh.go b/cmd/pulse-temp-proxy/ssh.go
deleted file mode 100644
index 23a9d7af8..000000000
--- a/cmd/pulse-temp-proxy/ssh.go
+++ /dev/null
@@ -1,161 +0,0 @@
-package main
-
-import (
- "bytes"
- "fmt"
- "os"
- "os/exec"
- "path/filepath"
- "strings"
-)
-
-// execCommand executes a shell command and returns output
-func execCommand(cmd string) (string, error) {
- out, err := exec.Command("sh", "-c", cmd).CombinedOutput()
- return string(out), err
-}
-
-// getPublicKey reads the SSH public key
-func (p *Proxy) getPublicKey() (string, error) {
- pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub")
- data, err := os.ReadFile(pubKeyPath)
- if err != nil {
- return "", err
- }
- return strings.TrimSpace(string(data)), nil
-}
-
-// pushSSHKey adds the proxy's public key to a node's authorized_keys with restrictions
-func (p *Proxy) pushSSHKey(nodeHost string) error {
- pubKey, err := p.getPublicKey()
- if err != nil {
- return fmt.Errorf("failed to get public key: %w", err)
- }
-
- // Create forced command entry with restrictions
- // This limits the key to only running "sensors -j"
- authorizedKey := fmt.Sprintf(`command="sensors -j",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty %s`, pubKey)
-
- // Build SSH command to add key to remote node
- // First, check if key already exists to avoid duplicates
- checkCmd := fmt.Sprintf(
- `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "grep -F '%s' /root/.ssh/authorized_keys 2>/dev/null"`,
- nodeHost,
- pubKey,
- )
-
- if output, _ := execCommand(checkCmd); strings.Contains(output, pubKey) {
- return nil // Key already exists
- }
-
- // Add the key
- addCmd := fmt.Sprintf(
- `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "mkdir -p /root/.ssh && chmod 700 /root/.ssh && echo '%s' >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"`,
- nodeHost,
- authorizedKey,
- )
-
- if _, err := execCommand(addCmd); err != nil {
- return fmt.Errorf("failed to add SSH key to %s: %w", nodeHost, err)
- }
-
- return nil
-}
-
-// testSSHConnection verifies SSH connectivity to a node
-func (p *Proxy) testSSHConnection(nodeHost string) error {
- privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
- cmd := fmt.Sprintf(
- `ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s "echo test"`,
- privKeyPath,
- nodeHost,
- )
-
- output, err := execCommand(cmd)
- if err != nil {
- return fmt.Errorf("SSH test failed: %w (output: %s)", err, output)
- }
-
- // The forced command will run "sensors -j" instead of "echo test"
- // So we should get JSON output, not "test"
- // For now, just check that connection succeeded
- return nil
-}
-
-// getTemperatureViaSSH fetches temperature data from a node
-func (p *Proxy) getTemperatureViaSSH(nodeHost string) (string, error) {
- privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
-
- // Since we use ForceCommand="sensors -j", any SSH command will run sensors
- // We don't need to specify the command
- cmd := fmt.Sprintf(
- `ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s ""`,
- privKeyPath,
- nodeHost,
- )
-
- output, err := execCommand(cmd)
- if err != nil {
- return "", fmt.Errorf("failed to fetch temperatures: %w", err)
- }
-
- return output, nil
-}
-
-// discoverClusterNodes discovers all nodes in the Proxmox cluster
-func discoverClusterNodes() ([]string, error) {
- // Check if pvecm is available (only on Proxmox hosts)
- if _, err := exec.LookPath("pvecm"); err != nil {
- return nil, fmt.Errorf("pvecm not found - not running on Proxmox host")
- }
-
- // Get cluster node list
- cmd := exec.Command("pvecm", "nodes")
- var out bytes.Buffer
- cmd.Stdout = &out
- if err := cmd.Run(); err != nil {
- return nil, fmt.Errorf("failed to get cluster nodes: %w", err)
- }
-
- // Parse output
- // Format:
- // Membership information
- // ----------------------
- // Nodeid Votes Name
- // 1 1 node1
- // 2 1 node2
-
- var nodes []string
- lines := strings.Split(out.String(), "\n")
- for _, line := range lines {
- fields := strings.Fields(line)
- // Skip header lines and empty lines
- if len(fields) < 3 {
- continue
- }
- // Check if first field is numeric (node ID)
- if fields[0][0] >= '0' && fields[0][0] <= '9' {
- nodeName := fields[2]
- nodes = append(nodes, nodeName)
- }
- }
-
- if len(nodes) == 0 {
- return nil, fmt.Errorf("no cluster nodes found")
- }
-
- return nodes, nil
-}
-
-// isProxmoxHost checks if we're running on a Proxmox host
-func isProxmoxHost() bool {
- // Check for pvecm command
- if _, err := exec.LookPath("pvecm"); err == nil {
- return true
- }
- // Check for /etc/pve directory
- if info, err := os.Stat("/etc/pve"); err == nil && info.IsDir() {
- return true
- }
- return false
-}
diff --git a/docs/PULSE_SENSOR_PROXY_HARDENING.md b/docs/PULSE_SENSOR_PROXY_HARDENING.md
new file mode 100644
index 000000000..9e62d8ba5
--- /dev/null
+++ b/docs/PULSE_SENSOR_PROXY_HARDENING.md
@@ -0,0 +1,962 @@
+# Pulse Temperature Proxy - Security Hardening Guide
+
+## Overview
+
+The `pulse-sensor-proxy` is a host-side service that provides secure temperature monitoring for containerized Pulse deployments. It addresses a critical security concern: SSH keys stored inside LXC containers can be exfiltrated if the container is compromised.
+
+**Architecture:**
+- Host-side proxy runs with minimal privileges on each Proxmox node
+- Containerized Pulse communicates via Unix socket (`/run/pulse-sensor-proxy/pulse-sensor-proxy.sock`)
+- Proxy authenticates containers using Linux `SO_PEERCRED` (UID/PID verification)
+- SSH keys never leave the host filesystem
+
+**Threat Model:**
+- ✅ Container compromise cannot access SSH keys
+- ✅ Container cannot directly SSH to cluster nodes
+- ✅ Rate limiting prevents abuse via socket
+- ✅ IP restrictions on SSH keys limit lateral movement
+- ✅ Audit logging tracks all temperature requests
+
+## Prerequisites
+
+- Proxmox VE 7.0+ or Proxmox Backup Server 2.0+
+- LXC container running Pulse (unprivileged recommended)
+- Root access to Proxmox host(s)
+- `lm-sensors` installed on all nodes
+- Cluster SSH access configured (root passwordless SSH between nodes)
+
+## Host Hardening
+
+### Service Account
+
+The proxy runs as the `pulse-sensor-proxy` user with these characteristics:
+- System account (no login shell: `/usr/sbin/nologin`)
+- No home directory
+- Dedicated group: `pulse-sensor-proxy`
+- Owns `/var/lib/pulse-sensor-proxy` and `/run/pulse-sensor-proxy`
+
+**Verify service account:**
+```bash
+# Check user exists
+id pulse-sensor-proxy
+
+# Expected output:
+# uid=XXX(pulse-sensor-proxy) gid=XXX(pulse-sensor-proxy) groups=XXX(pulse-sensor-proxy)
+
+# Check shell (should be /usr/sbin/nologin)
+getent passwd pulse-sensor-proxy | cut -d: -f7
+```
+
+### Systemd Unit Security
+
+The systemd unit includes comprehensive hardening directives:
+
+**Key security features:**
+- `User=pulse-sensor-proxy` / `Group=pulse-sensor-proxy` - Unprivileged execution
+- `NoNewPrivileges=true` - Prevents privilege escalation
+- `ProtectSystem=strict` - Read-only `/usr`, `/boot`, `/efi`
+- `ProtectHome=true` - Inaccessible `/home`, `/root`, `/run/user`
+- `PrivateTmp=true` - Isolated `/tmp` and `/var/tmp`
+- `SystemCallFilter=@system-service` - Restricted syscalls
+- `CapabilityBoundingSet=` - No capabilities granted
+- `RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6` - Socket restrictions
+
+**Verify systemd security:**
+```bash
+# Check service status
+systemctl status pulse-sensor-proxy
+
+# Verify user/group
+ps aux | grep pulse-sensor-proxy | grep -v grep
+
+# Expected: pulse-sensor-proxy user, not root
+
+# Check systemd security settings
+systemctl show pulse-sensor-proxy | grep -E '(User=|NoNewPrivileges|ProtectSystem|CapabilityBoundingSet)'
+```
+
+### File Permissions
+
+**Critical paths and ownership:**
+```
+/var/lib/pulse-sensor-proxy/ pulse-sensor-proxy:pulse-sensor-proxy 0750
+├── ssh/ pulse-sensor-proxy:pulse-sensor-proxy 0700
+│ ├── id_ed25519 pulse-sensor-proxy:pulse-sensor-proxy 0600
+│ └── id_ed25519.pub pulse-sensor-proxy:pulse-sensor-proxy 0640
+└── ssh.d/ pulse-sensor-proxy:pulse-sensor-proxy 0750
+ ├── next/ pulse-sensor-proxy:pulse-sensor-proxy 0750
+ └── prev/ pulse-sensor-proxy:pulse-sensor-proxy 0750
+
+/run/pulse-sensor-proxy/ pulse-sensor-proxy:pulse-sensor-proxy 0775
+└── pulse-sensor-proxy.sock pulse-sensor-proxy:pulse-sensor-proxy 0777
+```
+
+**Verify permissions:**
+```bash
+# Check base directory
+ls -ld /var/lib/pulse-sensor-proxy/
+# Expected: drwxr-x--- pulse-sensor-proxy pulse-sensor-proxy
+
+# Check SSH keys
+ls -l /var/lib/pulse-sensor-proxy/ssh/
+# Expected:
+# -rw------- pulse-sensor-proxy pulse-sensor-proxy id_ed25519
+# -rw-r----- pulse-sensor-proxy pulse-sensor-proxy id_ed25519.pub
+
+# Check socket directory (note: 0775 for container access)
+ls -ld /run/pulse-sensor-proxy/
+# Expected: drwxrwxr-x pulse-sensor-proxy pulse-sensor-proxy
+```
+
+**Why 0775 on socket directory?**
+The socket directory needs `0775` (not `0770`) to allow the container's unprivileged UID (e.g., 1001) to traverse into the directory and access the socket. The socket itself is `0777` as access control is enforced via `SO_PEERCRED`.
+
+## LXC Container Requirements
+
+### Configuration Summary
+
+| Setting | Value | Purpose |
+|---------|-------|---------|
+| `lxc.idmap` | `u 0 100000 65536`
`g 0 100000 65536` | Unprivileged UID/GID mapping |
+| `lxc.apparmor.profile` | `generated` or custom | AppArmor confinement |
+| `lxc.cap.drop` | `sys_admin` (optional) | Drop dangerous capabilities |
+| `lxc.mount.entry` | Directory-level bind mount | Socket access from container |
+
+### Sample LXC Configuration
+
+**In `/etc/pve/lxc/.conf`:**
+```ini
+# Unprivileged container (required)
+unprivileged: 1
+
+# AppArmor profile (recommended)
+lxc.apparmor.profile: generated
+
+# Drop CAP_SYS_ADMIN if feasible (optional but recommended)
+# WARNING: May break some container management operations
+lxc.cap.drop: sys_admin
+
+# Bind mount proxy socket directory (REQUIRED)
+# Note: Directory-level mount, not socket-level (socket is recreated by systemd)
+lxc.mount.entry: /run/pulse-sensor-proxy run/pulse-sensor-proxy none bind,create=dir 0 0
+```
+
+**Key points:**
+- **Directory-level mount**: Mount `/run/pulse-sensor-proxy` directory, not the socket file itself
+- **Why directory mount?** Systemd recreates the socket on restart; socket-level mounts break on recreation
+- **Mode 0775**: Socket directory needs group+other execute permissions for container UID traversal
+- **Socket 0777**: Actual socket is world-writable; security enforced via `SO_PEERCRED` authentication
+
+### Runtime Verification
+
+**Check container is unprivileged:**
+```bash
+# On host
+pct config | grep unprivileged
+# Expected: unprivileged: 1
+
+# Inside container
+cat /proc/self/uid_map
+# Expected: 0 100000 65536 (or similar)
+# NOT: 0 0 4294967295 (privileged)
+```
+
+**Check AppArmor confinement:**
+```bash
+# Inside container
+cat /proc/self/attr/current
+# Expected: lxc-_ (enforcing) or similar
+# NOT: unconfined
+```
+
+**Check namespace isolation:**
+```bash
+# Inside container
+ls -li /proc/self/ns/
+# Each namespace should have a unique inode number, different from host
+```
+
+**Check capabilities:**
+```bash
+# Inside container
+capsh --print | grep Current
+# Should show limited capability set
+# If lxc.cap.drop: sys_admin is set, CAP_SYS_ADMIN should be absent
+```
+
+**Check bind mount:**
+```bash
+# Inside container
+ls -la /run/pulse-sensor-proxy/
+# Expected: pulse-sensor-proxy.sock visible
+
+# Test socket access (requires Pulse to attempt connection)
+socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock
+# Should connect (may timeout waiting for input, but connection succeeds)
+```
+
+## Key Management
+
+### SSH Key Restrictions
+
+All SSH keys deployed to cluster nodes include these restrictions:
+- `command="sensors -j"` - Forced command (only sensors allowed)
+- `from=""` - IP address restrictions
+- `no-port-forwarding` - Disable port forwarding
+- `no-X11-forwarding` - Disable X11 forwarding
+- `no-agent-forwarding` - Disable agent forwarding
+- `no-pty` - Disable PTY allocation
+
+**Example authorized_keys entry:**
+```
+from="192.168.0.0/24,10.0.0.0/8",command="sensors -j",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty ssh-ed25519 AAAA... pulse-sensor-proxy
+```
+
+**Configure allowed subnets:**
+
+Create `/etc/pulse-sensor-proxy/config.yaml`:
+```yaml
+allowed_source_subnets:
+ - "192.168.0.0/24" # LAN subnet
+ - "10.0.0.0/8" # VPN subnet
+```
+
+Or use environment variable:
+```bash
+# In /etc/default/pulse-sensor-proxy (loaded by systemd)
+PULSE_SENSOR_PROXY_ALLOWED_SUBNETS="192.168.0.0/24,10.0.0.0/8"
+```
+
+**Auto-detection:**
+If no subnets are configured, the proxy auto-detects host IP addresses and uses them as `/32` (IPv4) or `/128` (IPv6) CIDRs. This is secure but brittle (breaks if host IP changes). Explicit configuration is recommended.
+
+**Verify SSH restrictions:**
+```bash
+# On any cluster node
+grep pulse-sensor-proxy /root/.ssh/authorized_keys
+
+# Expected format:
+# from="...",command="sensors -j",no-* ssh-ed25519 AAAA... pulse-sensor-proxy
+```
+
+### Key Rotation
+
+**Rotation cadence:**
+- Recommended: Every 90 days
+- Minimum: Every 180 days
+- After incident: Immediately
+
+**Rotation workflow:**
+
+The `pulse-sensor-proxy-rotate-keys.sh` script performs staged rotation with verification:
+
+1. **Dry-run (recommended first):**
+ ```bash
+ /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --dry-run
+ ```
+ Shows what would happen without making changes.
+
+2. **Perform rotation:**
+ ```bash
+ /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh
+ ```
+
+ **What happens:**
+ - Generates new Ed25519 keypair in `/var/lib/pulse-sensor-proxy/ssh.d/next/`
+ - Pushes new key to all cluster nodes (via RPC `ensure_cluster_keys`)
+ - Verifies SSH connectivity with new key on each node
+ - Atomically swaps keys:
+ - Current `/ssh/` → `/ssh.d/prev/` (backup)
+ - Staging `/ssh.d/next/` → `/ssh/` (active)
+ - Old keys preserved in `/ssh.d/prev/` for rollback
+
+3. **If rotation fails, rollback:**
+ ```bash
+ /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --rollback
+ ```
+
+ Restores previous keypair from `/ssh.d/prev/` and re-pushes to cluster nodes.
+
+**Post-rotation verification:**
+```bash
+# Check new key timestamp
+stat /var/lib/pulse-sensor-proxy/ssh/id_ed25519
+
+# Verify all nodes have new key
+for node in pve1 pve2 pve3; do
+ echo "=== $node ==="
+ ssh root@$node "grep pulse-sensor-proxy /root/.ssh/authorized_keys | tail -1"
+done
+
+# Test temperature fetch via proxy
+curl -s --unix-socket /run/pulse-sensor-proxy/pulse-sensor-proxy.sock \
+ -d '{"correlation_id":"test","method":"get_temp","params":{"node":"pve1"}}' \
+ | jq .
+```
+
+### Automated Rotation (Optional)
+
+**Create systemd timer:**
+
+`/etc/systemd/system/pulse-sensor-proxy-key-rotation.service`:
+```ini
+[Unit]
+Description=Rotate pulse-sensor-proxy SSH keys
+After=pulse-sensor-proxy.service
+Requires=pulse-sensor-proxy.service
+
+[Service]
+Type=oneshot
+ExecStart=/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh
+StandardOutput=journal
+StandardError=journal
+```
+
+`/etc/systemd/system/pulse-sensor-proxy-key-rotation.timer`:
+```ini
+[Unit]
+Description=Rotate pulse-sensor-proxy SSH keys every 90 days
+Requires=pulse-sensor-proxy-key-rotation.service
+
+[Timer]
+OnCalendar=quarterly
+RandomizedDelaySec=1h
+Persistent=true
+
+[Install]
+WantedBy=timers.target
+```
+
+**Enable timer:**
+```bash
+systemctl daemon-reload
+systemctl enable --now pulse-sensor-proxy-key-rotation.timer
+
+# Check next run
+systemctl list-timers pulse-sensor-proxy-key-rotation.timer
+```
+
+## Monitoring & Auditing
+
+### Metrics Endpoint
+
+The proxy exposes Prometheus metrics on `127.0.0.1:9127` by default.
+
+**Available metrics:**
+- `pulse_proxy_rpc_requests_total{method, result}` - RPC request counter
+- `pulse_proxy_rpc_latency_seconds{method}` - RPC handler latency histogram
+- `pulse_proxy_ssh_requests_total{node, result}` - SSH request counter per node
+- `pulse_proxy_ssh_latency_seconds{node}` - SSH latency histogram per node
+- `pulse_proxy_queue_depth` - Concurrent RPC requests (gauge)
+- `pulse_proxy_rate_limit_hits_total` - Rejected requests due to rate limiting
+- `pulse_proxy_build_info{version}` - Build metadata
+
+**Configure metrics address:**
+
+In `/etc/default/pulse-sensor-proxy`:
+```bash
+# Listen on all interfaces (WARNING: exposes metrics externally)
+PULSE_SENSOR_PROXY_METRICS_ADDR="0.0.0.0:9127"
+
+# Disable metrics
+PULSE_SENSOR_PROXY_METRICS_ADDR="disabled"
+```
+
+**Test metrics endpoint:**
+```bash
+curl -s http://127.0.0.1:9127/metrics | grep pulse_proxy
+```
+
+### Prometheus Integration
+
+**Sample scrape configuration:**
+
+```yaml
+scrape_configs:
+ - job_name: 'pulse-sensor-proxy'
+ static_configs:
+ - targets:
+ - 'pve1:9127'
+ - 'pve2:9127'
+ - 'pve3:9127'
+ relabel_configs:
+ - source_labels: [__address__]
+ regex: '([^:]+):.+'
+ target_label: instance
+```
+
+### Alert Rules
+
+**Recommended Prometheus alerts:**
+
+```yaml
+groups:
+ - name: pulse-sensor-proxy
+ rules:
+ # High SSH failure rate
+ - alert: PulseProxySSHFailureRate
+ expr: |
+ rate(pulse_proxy_ssh_requests_total{result="error"}[5m]) > 0.1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "High SSH failure rate on {{ $labels.instance }}"
+ description: "{{ $value | humanize }} SSH requests/sec failing"
+
+ # Rate limiting active
+ - alert: PulseProxyRateLimiting
+ expr: |
+ rate(pulse_proxy_rate_limit_hits_total[5m]) > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Rate limiting active on {{ $labels.instance }}"
+ description: "Proxy rejecting requests due to rate limits"
+
+ # High queue depth
+ - alert: PulseProxyQueueDepth
+ expr: pulse_proxy_queue_depth > 5
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "High RPC queue depth on {{ $labels.instance }}"
+ description: "{{ $value }} concurrent requests (threshold: 5)"
+
+ # Proxy down
+ - alert: PulseProxyDown
+ expr: up{job="pulse-sensor-proxy"} == 0
+ for: 2m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Pulse proxy down on {{ $labels.instance }}"
+```
+
+### Audit Logging
+
+**Log format:**
+All RPC requests are logged with structured fields:
+- `corr_id` - Correlation ID (UUID, tracks request lifecycle)
+- `uid` / `pid` - Peer credentials from `SO_PEERCRED`
+- `method` - RPC method called (`get_temp`, `register_nodes`, `ensure_cluster_keys`)
+
+**Example log entries:**
+```json
+{"level":"info","corr_id":"a7f3d..","uid":1001,"pid":12345,"method":"get_temp","node":"pve1","msg":"RPC request"}
+{"level":"info","corr_id":"a7f3d..","node":"pve1","latency_ms":245,"msg":"Temperature fetch successful"}
+```
+
+**Query logs:**
+```bash
+# All RPC requests in last hour
+journalctl -u pulse-sensor-proxy --since "1 hour ago" -o json | \
+ jq -r 'select(.corr_id != null) | [.corr_id, .uid, .method, .node] | @tsv'
+
+# Failed SSH requests
+journalctl -u pulse-sensor-proxy --since today | grep -E '(SSH.*failed|error)'
+
+# Rate limit hits
+journalctl -u pulse-sensor-proxy --since today | grep "rate limit"
+
+# Specific correlation ID
+journalctl -u pulse-sensor-proxy | grep "corr_id=a7f3d"
+```
+
+### Rate Limiting
+
+**Current limits (per peer UID+PID):**
+- **Rate**: 20 requests/minute (token bucket with burst)
+- **Burst**: 10 requests
+- **Concurrency**: 10 simultaneous requests
+
+**Behavior on limit exceeded:**
+- Request rejected immediately (no queuing)
+- `pulse_proxy_rate_limit_hits_total` metric incremented
+- Log entry: `"Rate limit exceeded"`
+- HTTP-like semantics: Similar to 429 Too Many Requests
+
+**Adjust limits:**
+
+Limits are hardcoded in `throttle.go`. To adjust, modify and rebuild:
+```go
+// cmd/pulse-sensor-proxy/throttle.go
+const (
+ requestsPerMin = 20 // Change this
+ requestBurst = 10 // Change this
+ maxConcurrent = 10 // Change this
+)
+```
+
+Then rebuild and restart:
+```bash
+go build -v ./cmd/pulse-sensor-proxy
+systemctl restart pulse-sensor-proxy
+```
+
+## Incident Response
+
+### Suspected Compromise Checklist
+
+**If the proxy or host is suspected compromised:**
+
+1. **Isolate immediately:**
+ ```bash
+ # Stop proxy service
+ systemctl stop pulse-sensor-proxy
+
+ # Block outbound SSH from host (if applicable)
+ iptables -A OUTPUT -p tcp --dport 22 -j REJECT
+ ```
+
+2. **Rotate all keys:**
+ ```bash
+ # Remove compromised keys from all nodes
+ for node in pve1 pve2 pve3; do
+ ssh root@$node "sed -i '/pulse-sensor-proxy/d' /root/.ssh/authorized_keys"
+ done
+
+ # Generate new keys (don't use rotation script - may be compromised)
+ rm -rf /var/lib/pulse-sensor-proxy/ssh*
+ mkdir -p /var/lib/pulse-sensor-proxy/ssh
+ ssh-keygen -t ed25519 -N '' -C "pulse-sensor-proxy emergency $(date -u +%Y%m%dT%H%M%SZ)" \
+ -f /var/lib/pulse-sensor-proxy/ssh/id_ed25519
+ chown -R pulse-sensor-proxy:pulse-sensor-proxy /var/lib/pulse-sensor-proxy/ssh
+ chmod 0700 /var/lib/pulse-sensor-proxy/ssh
+ chmod 0600 /var/lib/pulse-sensor-proxy/ssh/id_ed25519
+ chmod 0640 /var/lib/pulse-sensor-proxy/ssh/id_ed25519.pub
+ ```
+
+3. **Audit logs:**
+ ```bash
+ # Export all proxy logs
+ journalctl -u pulse-sensor-proxy --since "7 days ago" > /tmp/proxy-audit-$(date +%s).log
+
+ # Look for anomalies:
+ # - Unusual correlation IDs
+ # - High rate limit hits
+ # - Unexpected UIDs/PIDs
+ # - SSH errors to unexpected nodes
+ ```
+
+4. **Reinstall proxy:**
+ ```bash
+ # Re-run installation script
+ /opt/pulse/scripts/install-temp-proxy.sh
+
+ # Verify service status
+ systemctl status pulse-sensor-proxy
+ ```
+
+5. **Re-push keys:**
+ ```bash
+ # Use proxy RPC to push new keys
+ /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh
+ ```
+
+6. **Verify no persistence mechanisms:**
+ ```bash
+ # Check for unexpected systemd units
+ systemctl list-units --all | grep -i proxy
+
+ # Check for unexpected cron jobs
+ crontab -l -u pulse-sensor-proxy
+
+ # Check for unauthorized files in /var/lib/pulse-sensor-proxy
+ find /var/lib/pulse-sensor-proxy -type f ! -path '*/ssh/*' ! -path '*/ssh.d/*'
+ ```
+
+### Post-Incident Hardening
+
+After an incident, consider:
+- **Audit all LXC containers** for unexpected privilege escalation
+- **Review bind mounts** on all containers (check for unauthorized mounts)
+- **Enable full syscall auditing** (`auditd`) on host
+- **Restrict network access** to proxy metrics endpoint (firewall `127.0.0.1:9127`)
+- **Implement log aggregation** (forward `journald` to central SIEM)
+
+## Testing & Rollout
+
+### Development Testing
+
+Before deploying to production, verify the implementation with these safe tests:
+
+**1. Build Verification:**
+```bash
+# Compile proxy
+cd /opt/pulse
+go build -v ./cmd/pulse-sensor-proxy
+
+# Verify binary
+./pulse-sensor-proxy version
+# Expected: pulse-sensor-proxy dev (or version number)
+
+# Check help output
+./pulse-sensor-proxy --help
+```
+
+**2. Rotation Script Syntax:**
+```bash
+# Syntax check
+bash -n /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh
+
+# Help output
+/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --help
+
+# Dry-run (requires root and socket)
+sudo /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --dry-run
+```
+
+**3. Configuration Validation:**
+```bash
+# Test config file parsing
+cat > /tmp/test-config.yaml < /tmp/pulse-sensor-proxy-status-before.txt
+ ```
+
+2. **Create service account:**
+ ```bash
+ # Run install script or manually create
+ if ! id -u pulse-sensor-proxy >/dev/null 2>&1; then
+ useradd --system --user-group --no-create-home --shell /usr/sbin/nologin pulse-sensor-proxy
+ fi
+ ```
+
+3. **Update file ownership:**
+ ```bash
+ chown -R pulse-sensor-proxy:pulse-sensor-proxy /var/lib/pulse-sensor-proxy/
+ chmod 0750 /var/lib/pulse-sensor-proxy/
+ chmod 0700 /var/lib/pulse-sensor-proxy/ssh/
+ chmod 0600 /var/lib/pulse-sensor-proxy/ssh/id_ed25519
+ chmod 0640 /var/lib/pulse-sensor-proxy/ssh/id_ed25519.pub
+ ```
+
+**Phase 2: Deploy Hardened Version**
+
+1. **Build and install binary:**
+ ```bash
+ cd /opt/pulse
+ go build -v -o /tmp/pulse-sensor-proxy ./cmd/pulse-sensor-proxy
+
+ # Verify build
+ /tmp/pulse-sensor-proxy version
+
+ # Install
+ sudo install -m 0755 -o root -g root /tmp/pulse-sensor-proxy /usr/local/bin/pulse-sensor-proxy
+ ```
+
+2. **Install hardened systemd unit:**
+ ```bash
+ # Copy hardened unit
+ sudo cp /opt/pulse/scripts/pulse-sensor-proxy.service /etc/systemd/system/
+
+ # Verify syntax
+ systemd-analyze verify /etc/systemd/system/pulse-sensor-proxy.service
+
+ # Reload systemd
+ sudo systemctl daemon-reload
+ ```
+
+3. **Update RuntimeDirectoryMode for LXC access:**
+ ```bash
+ # Ensure socket directory is accessible from container
+ sudo mkdir -p /etc/systemd/system/pulse-sensor-proxy.service.d/
+ cat | sudo tee /etc/systemd/system/pulse-sensor-proxy.service.d/lxc-access.conf <<'EOF'
+[Service]
+RuntimeDirectoryMode=0775
+EOF
+
+ sudo systemctl daemon-reload
+ ```
+
+**Phase 3: Restart and Verify**
+
+1. **Restart service:**
+ ```bash
+ sudo systemctl restart pulse-sensor-proxy
+
+ # Check status
+ sudo systemctl status pulse-sensor-proxy
+ ```
+
+2. **Verify service user:**
+ ```bash
+ ps aux | grep pulse-sensor-proxy | grep -v grep
+ # Expected: pulse-sensor-proxy user, not root
+ ```
+
+3. **Check socket permissions:**
+ ```bash
+ ls -ld /run/pulse-sensor-proxy/
+ # Expected: drwxrwxr-x pulse-sensor-proxy pulse-sensor-proxy
+
+ ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock
+ # Expected: srwxrwxrwx pulse-sensor-proxy pulse-sensor-proxy
+ ```
+
+4. **Test from container:**
+ ```bash
+ # Inside LXC container running Pulse
+ ls -la /run/pulse-sensor-proxy/
+ # Should show socket
+
+ # Check Pulse logs for connection success
+ journalctl -u pulse-backend -n 50 | grep -i temperature
+ ```
+
+**Phase 4: End-to-End Validation**
+
+1. **Test RPC methods:**
+ ```bash
+ # On host, test socket connectivity
+ echo '{"correlation_id":"test-001","method":"register_nodes","params":{}}' | \
+ sudo socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock | jq .
+
+ # Should return cluster nodes list
+ ```
+
+2. **Test temperature fetch:**
+ ```bash
+ # From container or via socket
+ echo '{"correlation_id":"test-002","method":"get_temp","params":{"node":"pve1"}}' | \
+ socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock | jq .
+
+ # Should return sensors JSON data
+ ```
+
+3. **Verify metrics endpoint:**
+ ```bash
+ curl -s http://127.0.0.1:9127/metrics | grep pulse_proxy
+
+ # Should show metrics like:
+ # pulse_proxy_rpc_requests_total{method="get_temp",result="success"} N
+ # pulse_proxy_queue_depth 0
+ ```
+
+4. **Test SSH key rotation:**
+ ```bash
+ # Dry-run first
+ sudo /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --dry-run
+
+ # Full rotation (if confident)
+ sudo /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh
+
+ # Verify all nodes updated
+ for node in pve1 pve2 pve3; do
+ ssh root@$node "tail -1 /root/.ssh/authorized_keys"
+ done
+ ```
+
+5. **Audit logging verification:**
+ ```bash
+ # Check logs include correlation IDs and peer credentials
+ sudo journalctl -u pulse-sensor-proxy --since "5 minutes ago" -o json | \
+ jq -r 'select(.corr_id != null) | [.corr_id, .uid, .method] | @tsv'
+
+ # Should show structured logging with UIDs
+ ```
+
+**Phase 5: Monitoring Setup**
+
+1. **Configure Prometheus scraping:**
+ ```yaml
+ # Add to prometheus.yml
+ scrape_configs:
+ - job_name: 'pulse-sensor-proxy'
+ static_configs:
+ - targets: ['localhost:9127']
+ ```
+
+2. **Import alert rules:**
+ ```bash
+ # Copy alert rules from docs to Prometheus alerts directory
+ # Reload Prometheus configuration
+ ```
+
+3. **Verify alerts fire (optional stress test):**
+ ```bash
+ # Generate rate limit hits (test alert)
+ for i in {1..50}; do
+ echo '{"correlation_id":"stress-'$i'","method":"register_nodes","params":{}}' | \
+ socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock &
+ done
+ wait
+
+ # Check rate limit metric increased
+ curl -s http://127.0.0.1:9127/metrics | grep rate_limit_hits
+ ```
+
+### Rollback Procedure
+
+If issues occur during rollout:
+
+1. **Stop new service:**
+ ```bash
+ sudo systemctl stop pulse-sensor-proxy
+ ```
+
+2. **Restore backup:**
+ ```bash
+ sudo cp /etc/systemd/system/pulse-sensor-proxy.service.backup \
+ /etc/systemd/system/pulse-sensor-proxy.service
+ sudo systemctl daemon-reload
+ ```
+
+3. **Restore SSH keys (if rotated):**
+ ```bash
+ # If rotation was performed and failed
+ sudo /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --rollback
+ ```
+
+4. **Restart with old configuration:**
+ ```bash
+ sudo systemctl restart pulse-sensor-proxy
+ sudo systemctl status pulse-sensor-proxy
+ ```
+
+5. **Verify Pulse connectivity:**
+ ```bash
+ # Check Pulse can still fetch temperatures
+ # Monitor Pulse logs
+ ```
+
+### Known Limitations
+
+- **No automated unit tests**: Code verification relies on build success and manual testing
+- **Key rotation requires manual trigger**: Automated timer setup is optional
+- **Metrics require Prometheus**: No built-in alerting without external monitoring
+- **LXC bind mount required**: Container must have directory-level bind mount configured
+- **Root required for rotation script**: Script needs root to run `ensure_cluster_keys` RPC
+
+### Future Improvements
+
+- Add Go unit tests for validation, throttling, and metrics logic
+- Implement health check endpoint (e.g., `/health`) separate from metrics
+- Add support for TLS on metrics endpoint
+- Create automated integration test suite
+- Add `--check` flag to rotation script for pre-flight validation
+- Support for multiple LXC containers accessing same proxy instance
+
+## Appendix
+
+### Quick Verification Checklist
+
+**Host:**
+- [ ] Service running as `pulse-sensor-proxy` user (not root)
+- [ ] Keys in `/var/lib/pulse-sensor-proxy/ssh/` owned by `pulse-sensor-proxy:pulse-sensor-proxy`
+- [ ] Private key permissions: `0600`
+- [ ] Socket directory permissions: `0775` (not `0770`)
+- [ ] Metrics endpoint accessible: `curl http://127.0.0.1:9127/metrics`
+
+**Container:**
+- [ ] Container is unprivileged (`unprivileged: 1` in config)
+- [ ] Bind mount exists: `ls /run/pulse-sensor-proxy/pulse-sensor-proxy.sock`
+- [ ] AppArmor enforced: `cat /proc/self/attr/current` shows confinement
+- [ ] Pulse can connect to socket (check Pulse logs)
+
+**SSH Keys:**
+- [ ] All nodes have `pulse-sensor-proxy` key in `/root/.ssh/authorized_keys`
+- [ ] Keys include `from="..."` restrictions
+- [ ] Keys include `command="sensors -j"` forced command
+- [ ] Keys include `no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty`
+
+**Monitoring:**
+- [ ] Prometheus scraping metrics successfully
+- [ ] Alerts configured for SSH failures, rate limiting, queue depth
+- [ ] Logs forwarded to central logging (optional but recommended)
+
+### Reference Commands
+
+**Service Management:**
+```bash
+systemctl status pulse-sensor-proxy # Check service status
+systemctl restart pulse-sensor-proxy # Restart service
+journalctl -u pulse-sensor-proxy -f # Tail logs
+```
+
+**Key Management:**
+```bash
+/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --dry-run # Dry-run rotation
+/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh # Perform rotation
+/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --rollback # Rollback
+```
+
+**Metrics:**
+```bash
+curl http://127.0.0.1:9127/metrics # Fetch all metrics
+curl -s http://127.0.0.1:9127/metrics | grep pulse_proxy # Filter proxy metrics
+```
+
+**Manual RPC (Testing):**
+```bash
+# Using socat (inline JSON)
+echo '{"correlation_id":"test","method":"get_temp","params":{"node":"pve1"}}' | \
+ socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock
+
+# Using Python (proper JSON-RPC client)
+python3 <<'PY'
+import json, socket, uuid
+payload = {
+ "correlation_id": str(uuid.uuid4()),
+ "method": "get_temp",
+ "params": {"node": "pve1"}
+}
+with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s:
+ s.connect("/run/pulse-sensor-proxy/pulse-sensor-proxy.sock")
+ s.sendall((json.dumps(payload) + "\n").encode())
+ s.shutdown(socket.SHUT_WR)
+ print(s.recv(65536).decode())
+PY
+```
+
+**Verification:**
+```bash
+# Check service user
+ps aux | grep pulse-sensor-proxy | grep -v grep
+
+# Check file ownership
+ls -lR /var/lib/pulse-sensor-proxy/
+
+# Check bind mount in container
+pct enter
+ls -la /run/pulse-sensor-proxy/
+
+# Check SSH keys on nodes
+for node in pve1 pve2 pve3; do
+ echo "=== $node ==="
+ ssh root@$node "grep pulse-sensor-proxy /root/.ssh/authorized_keys"
+done
+```
+
+---
+
+**Document Version:** 1.0
+**Last Updated:** 2025-10-13
+**Applies To:** pulse-sensor-proxy v1.0+
diff --git a/docs/TEMPERATURE_MONITORING.md b/docs/TEMPERATURE_MONITORING.md
index 512a63886..91e5139ed 100644
--- a/docs/TEMPERATURE_MONITORING.md
+++ b/docs/TEMPERATURE_MONITORING.md
@@ -18,8 +18,8 @@ Pulse can display real-time CPU and NVMe temperatures directly in your dashboard
For **containerized deployments** (LXC/Docker), Pulse uses a secure proxy architecture:
-1. **pulse-temp-proxy** runs on the Proxmox host (outside the container)
-2. SSH keys are stored on the host filesystem (`/var/lib/pulse-temp-proxy/ssh/`)
+1. **pulse-sensor-proxy** runs on the Proxmox host (outside the container)
+2. SSH keys are stored on the host filesystem (`/var/lib/pulse-sensor-proxy/ssh/`)
3. Pulse communicates with the proxy via unix socket
4. The proxy handles all SSH connections to cluster nodes
@@ -202,7 +202,7 @@ You can still manage the entry manually if you prefer, but no extra steps are re
### Secure Proxy Architecture (Current)
-As of v4.24.0, containerized deployments use **pulse-temp-proxy** which eliminates the security concerns:
+As of v4.24.0, containerized deployments use **pulse-sensor-proxy** which eliminates the security concerns:
- **SSH keys stored on host** - Not accessible from container
- **Unix socket communication** - Pulse never touches SSH keys
@@ -294,13 +294,13 @@ To check if your deployment is using the secure proxy:
```bash
# On Proxmox host - check proxy service
-systemctl status pulse-temp-proxy
+systemctl status pulse-sensor-proxy
# Check if socket exists
-ls -l /run/pulse-temp-proxy/pulse-temp-proxy.sock
+ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock
# View proxy logs
-journalctl -u pulse-temp-proxy -f
+journalctl -u pulse-sensor-proxy -f
```
In the Pulse container, check the logs at startup:
@@ -327,27 +327,27 @@ Temperature data will stop appearing in the dashboard after the next polling cyc
### Managing the Proxy Service
-The pulse-temp-proxy service runs on the Proxmox host (outside the container).
+The pulse-sensor-proxy service runs on the Proxmox host (outside the container).
**Service Management:**
```bash
# Check service status
-systemctl status pulse-temp-proxy
+systemctl status pulse-sensor-proxy
# Restart the proxy
-systemctl restart pulse-temp-proxy
+systemctl restart pulse-sensor-proxy
# Stop the proxy (disables temperature monitoring)
-systemctl stop pulse-temp-proxy
+systemctl stop pulse-sensor-proxy
# Start the proxy
-systemctl start pulse-temp-proxy
+systemctl start pulse-sensor-proxy
# Enable proxy to start on boot
-systemctl enable pulse-temp-proxy
+systemctl enable pulse-sensor-proxy
# Disable proxy autostart
-systemctl disable pulse-temp-proxy
+systemctl disable pulse-sensor-proxy
```
### Log Locations
@@ -355,16 +355,16 @@ systemctl disable pulse-temp-proxy
**Proxy Logs (on Proxmox host):**
```bash
# Follow proxy logs in real-time
-journalctl -u pulse-temp-proxy -f
+journalctl -u pulse-sensor-proxy -f
# View last 50 lines
-journalctl -u pulse-temp-proxy -n 50
+journalctl -u pulse-sensor-proxy -n 50
# View logs since last boot
-journalctl -u pulse-temp-proxy -b
+journalctl -u pulse-sensor-proxy -b
# View logs with timestamps
-journalctl -u pulse-temp-proxy --since "1 hour ago"
+journalctl -u pulse-sensor-proxy --since "1 hour ago"
```
**Pulse Logs (in container):**
@@ -381,12 +381,12 @@ Rotate SSH keys periodically for security (recommended every 90 days):
```bash
# 1. On Proxmox host, backup old keys
-cd /var/lib/pulse-temp-proxy/ssh/
+cd /var/lib/pulse-sensor-proxy/ssh/
cp id_ed25519 id_ed25519.backup
cp id_ed25519.pub id_ed25519.pub.backup
# 2. Generate new keypair
-ssh-keygen -t ed25519 -f id_ed25519 -N "" -C "pulse-temp-proxy-rotated"
+ssh-keygen -t ed25519 -f id_ed25519 -N "" -C "pulse-sensor-proxy-rotated"
# 3. Get the new public key
cat id_ed25519.pub
@@ -398,12 +398,12 @@ ssh root@node2 "echo 'NEW_PUBLIC_KEY_HERE' >> /root/.ssh/authorized_keys"
# ... repeat for all nodes
# 5. Restart proxy to use new keys
-systemctl restart pulse-temp-proxy
+systemctl restart pulse-sensor-proxy
# 6. Verify temperature data still works in Pulse UI
# 7. Remove old keys from nodes (after confirming new keys work)
-ssh root@node1 "sed -i '/pulse-temp-proxy-old/d' /root/.ssh/authorized_keys"
+ssh root@node1 "sed -i '/pulse-sensor-proxy-old/d' /root/.ssh/authorized_keys"
```
### Revoking Access When Nodes Leave
@@ -412,7 +412,7 @@ When removing a node from your cluster:
```bash
# On the node being removed, remove the proxy's public key
-ssh root@old-node "sed -i '/pulse-temp-proxy/d' /root/.ssh/authorized_keys"
+ssh root@old-node "sed -i '/pulse-sensor-proxy/d' /root/.ssh/authorized_keys"
# No restart needed - proxy will fail gracefully for that node
# Temperature monitoring will continue for remaining nodes
@@ -422,14 +422,14 @@ ssh root@old-node "sed -i '/pulse-temp-proxy/d' /root/.ssh/authorized_keys"
**Proxy Not Running:**
- Symptom: No temperature data in Pulse UI
-- Check: `systemctl status pulse-temp-proxy` on Proxmox host
-- Fix: `systemctl start pulse-temp-proxy`
+- Check: `systemctl status pulse-sensor-proxy` on Proxmox host
+- Fix: `systemctl start pulse-sensor-proxy`
**Socket Not Accessible in Container:**
- Symptom: Pulse logs show "Temperature proxy not available - using direct SSH"
-- Check: `ls -l /run/pulse-temp-proxy/pulse-temp-proxy.sock` in container
+- Check: `ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock` in container
- Fix: Verify bind mount in LXC config (`/etc/pve/lxc/.conf`)
-- Should have: `lxc.mount.entry: /run/pulse-temp-proxy run/pulse-temp-proxy none bind,create=dir 0 0`
+- Should have: `lxc.mount.entry: /run/pulse-sensor-proxy run/pulse-sensor-proxy none bind,create=dir 0 0`
**pvecm Not Available:**
- Symptom: Proxy fails to discover cluster nodes
@@ -455,13 +455,13 @@ The proxy service includes systemd restart-on-failure, which handles most issues
```bash
# Check proxy health
-systemctl is-active pulse-temp-proxy && echo "Proxy is running" || echo "Proxy is down"
+systemctl is-active pulse-sensor-proxy && echo "Proxy is running" || echo "Proxy is down"
# Monitor logs for errors
-journalctl -u pulse-temp-proxy --since "1 hour ago" | grep -i error
+journalctl -u pulse-sensor-proxy --since "1 hour ago" | grep -i error
# Verify socket exists and is accessible
-test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock && echo "Socket OK" || echo "Socket missing"
+test -S /run/pulse-sensor-proxy/pulse-sensor-proxy.sock && echo "Socket OK" || echo "Socket missing"
```
**Alerting:**
@@ -474,7 +474,7 @@ test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock && echo "Socket OK" || echo
### Known Limitations
**One Proxy Per Host:**
-- Each Proxmox host runs one pulse-temp-proxy instance
+- Each Proxmox host runs one pulse-sensor-proxy instance
- If multiple Pulse containers run on same host, they share the same proxy
- All containers see the same temperature data from the same cluster
@@ -496,10 +496,10 @@ test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock && echo "Socket OK" || echo
### Common Issues
**Temperature Data Stops Appearing:**
-1. Check proxy service: `systemctl status pulse-temp-proxy`
-2. Check proxy logs: `journalctl -u pulse-temp-proxy -n 50`
+1. Check proxy service: `systemctl status pulse-sensor-proxy`
+2. Check proxy logs: `journalctl -u pulse-sensor-proxy -n 50`
3. Test SSH manually: `ssh root@node "sensors -j"`
-4. Verify socket exists: `ls -l /run/pulse-temp-proxy/pulse-temp-proxy.sock`
+4. Verify socket exists: `ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock`
**New Cluster Node Not Showing Temperatures:**
1. Ensure lm-sensors installed: `ssh root@new-node "sensors -j"`
@@ -507,14 +507,14 @@ test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock && echo "Socket OK" || echo
3. Force refresh by restarting Pulse: `pct restart `
**Permission Denied Errors:**
-1. Verify socket permissions: `ls -l /run/pulse-temp-proxy/pulse-temp-proxy.sock`
+1. Verify socket permissions: `ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock`
2. Should be: `srw-rw---- 1 root root`
3. Check Pulse runs as root in container: `pct exec -- whoami`
**Proxy Service Won't Start:**
-1. Check logs: `journalctl -u pulse-temp-proxy -n 50`
-2. Verify binary exists: `ls -l /usr/local/bin/pulse-temp-proxy`
-3. Test manually: `/usr/local/bin/pulse-temp-proxy --version`
+1. Check logs: `journalctl -u pulse-sensor-proxy -n 50`
+2. Verify binary exists: `ls -l /usr/local/bin/pulse-sensor-proxy`
+3. Test manually: `/usr/local/bin/pulse-sensor-proxy --version`
4. Check socket directory: `ls -ld /var/run`
### Getting Help
@@ -524,9 +524,9 @@ If temperature monitoring isn't working:
1. **Collect diagnostic info:**
```bash
# On Proxmox host
- systemctl status pulse-temp-proxy
- journalctl -u pulse-temp-proxy -n 100 > /tmp/proxy-logs.txt
- ls -la /run/pulse-temp-proxy/pulse-temp-proxy.sock
+ systemctl status pulse-sensor-proxy
+ journalctl -u pulse-sensor-proxy -n 100 > /tmp/proxy-logs.txt
+ ls -la /run/pulse-sensor-proxy/pulse-sensor-proxy.sock
# In Pulse container
journalctl -u pulse -n 100 | grep -i temp > /tmp/pulse-temp-logs.txt
diff --git a/go.mod b/go.mod
index 98d3251d5..9884e7d75 100644
--- a/go.mod
+++ b/go.mod
@@ -8,18 +8,24 @@ require (
github.com/coreos/go-oidc/v3 v3.15.0
github.com/docker/docker v28.5.1+incompatible
github.com/fsnotify/fsnotify v1.9.0
+ github.com/google/uuid v1.6.0
github.com/gorilla/websocket v1.5.3
github.com/joho/godotenv v1.5.1
+ github.com/oklog/ulid/v2 v2.1.1
+ github.com/prometheus/client_golang v1.23.2
github.com/rs/zerolog v1.34.0
github.com/spf13/cobra v1.9.1
golang.org/x/crypto v0.42.0
golang.org/x/oauth2 v0.31.0
golang.org/x/term v0.35.0
+ golang.org/x/time v0.13.0
gopkg.in/yaml.v3 v3.0.1
)
require (
github.com/Microsoft/go-winio v0.4.21 // indirect
+ github.com/beorn7/perks v1.0.1 // indirect
+ github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/containerd/errdefs v1.0.0 // indirect
github.com/containerd/errdefs/pkg v0.3.0 // indirect
github.com/containerd/log v0.1.0 // indirect
@@ -37,10 +43,13 @@ require (
github.com/moby/sys/atomicwriter v0.1.0 // indirect
github.com/moby/term v0.5.2 // indirect
github.com/morikuni/aec v1.0.0 // indirect
- github.com/oklog/ulid/v2 v2.1.1 // indirect
+ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect
github.com/pkg/errors v0.9.1 // indirect
+ github.com/prometheus/client_model v0.6.2 // indirect
+ github.com/prometheus/common v0.66.1 // indirect
+ github.com/prometheus/procfs v0.16.1 // indirect
github.com/spf13/pflag v1.0.7 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
@@ -48,7 +57,8 @@ require (
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 // indirect
go.opentelemetry.io/otel/metric v1.38.0 // indirect
go.opentelemetry.io/otel/trace v1.38.0 // indirect
+ go.yaml.in/yaml/v2 v2.4.2 // indirect
golang.org/x/sys v0.36.0 // indirect
- golang.org/x/time v0.13.0 // indirect
+ google.golang.org/protobuf v1.36.8 // indirect
gotest.tools/v3 v3.5.2 // indirect
)
diff --git a/go.sum b/go.sum
index 056def50d..4312ec428 100644
--- a/go.sum
+++ b/go.sum
@@ -2,8 +2,12 @@ github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEK
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/Microsoft/go-winio v0.4.21 h1:+6mVbXh4wPzUrl1COX9A+ZCvEpYsOBZ6/+kwDnvLyro=
github.com/Microsoft/go-winio v0.4.21/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
@@ -48,10 +52,14 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
+github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
+github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
@@ -69,6 +77,8 @@ github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/oklog/ulid/v2 v2.1.1 h1:suPZ4ARWLOJLegGFiZZ1dFAkqzhMjL3J1TzI+5wHz8s=
github.com/oklog/ulid/v2 v2.1.1/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
@@ -80,6 +90,14 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
+github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
+github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
+github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
+github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
+github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
+github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
+github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
@@ -117,6 +135,10 @@ go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJr
go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4=
go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
+go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI=
golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8=
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
diff --git a/internal/api/config_handlers.go b/internal/api/config_handlers.go
index e5f38d794..67a96c03e 100644
--- a/internal/api/config_handlers.go
+++ b/internal/api/config_handlers.go
@@ -3230,7 +3230,7 @@ echo " • No port forwarding, X11, PTY, or agent forwarding allowed"
echo ""
echo "For containerized Pulse (LXC/Docker):"
echo " • SSH keys stored on Proxmox host (not inside container)"
-echo " • pulse-temp-proxy service manages connections securely"
+echo " • pulse-sensor-proxy service manages connections securely"
echo " • Container compromise does not expose SSH credentials"
echo ""
echo "For native Pulse installations:"
diff --git a/internal/tempproxy/client.go b/internal/tempproxy/client.go
index 1ffa08198..75db56449 100644
--- a/internal/tempproxy/client.go
+++ b/internal/tempproxy/client.go
@@ -11,11 +11,11 @@ import (
)
const (
- defaultSocketPath = "/run/pulse-temp-proxy/pulse-temp-proxy.sock"
+ defaultSocketPath = "/run/pulse-sensor-proxy/pulse-sensor-proxy.sock"
defaultTimeout = 10 * time.Second
)
-// Client communicates with pulse-temp-proxy via unix socket
+// Client communicates with pulse-sensor-proxy via unix socket
type Client struct {
socketPath string
timeout time.Duration
@@ -23,7 +23,7 @@ type Client struct {
// NewClient creates a new proxy client
func NewClient() *Client {
- socketPath := os.Getenv("PULSE_TEMP_PROXY_SOCKET")
+ socketPath := os.Getenv("PULSE_SENSOR_PROXY_SOCKET")
if socketPath == "" {
socketPath = defaultSocketPath
}
diff --git a/scripts/build-release.sh b/scripts/build-release.sh
index db4f52b8c..0a87aa912 100755
--- a/scripts/build-release.sh
+++ b/scripts/build-release.sh
@@ -67,8 +67,8 @@ for build_name in "${!builds[@]}"; do
env $build_env go build \
-ldflags="-s -w -X main.Version=v${VERSION} -X main.BuildTime=${build_time} -X main.GitCommit=${git_commit}" \
-trimpath \
- -o "$BUILD_DIR/pulse-temp-proxy-$build_name" \
- ./cmd/pulse-temp-proxy
+ -o "$BUILD_DIR/pulse-sensor-proxy-$build_name" \
+ ./cmd/pulse-sensor-proxy
# Create release archive with proper structure
tar_name="pulse-v${VERSION}-${build_name}.tar.gz"
@@ -82,7 +82,7 @@ for build_name in "${!builds[@]}"; do
# Copy binaries and VERSION file
cp "$BUILD_DIR/pulse-$build_name" "$staging_dir/bin/pulse"
cp "$BUILD_DIR/pulse-docker-agent-$build_name" "$staging_dir/bin/pulse-docker-agent"
- cp "$BUILD_DIR/pulse-temp-proxy-$build_name" "$staging_dir/bin/pulse-temp-proxy"
+ cp "$BUILD_DIR/pulse-sensor-proxy-$build_name" "$staging_dir/bin/pulse-sensor-proxy"
cp "scripts/install-docker-agent.sh" "$staging_dir/scripts/install-docker-agent.sh"
chmod 755 "$staging_dir/scripts/install-docker-agent.sh"
echo "$VERSION" > "$staging_dir/VERSION"
@@ -109,7 +109,7 @@ mkdir -p "$universal_dir/scripts"
for build_name in "${!builds[@]}"; do
cp "$BUILD_DIR/pulse-$build_name" "$universal_dir/bin/pulse-${build_name}"
cp "$BUILD_DIR/pulse-docker-agent-$build_name" "$universal_dir/bin/pulse-docker-agent-${build_name}"
- cp "$BUILD_DIR/pulse-temp-proxy-$build_name" "$universal_dir/bin/pulse-temp-proxy-${build_name}"
+ cp "$BUILD_DIR/pulse-sensor-proxy-$build_name" "$universal_dir/bin/pulse-sensor-proxy-${build_name}"
done
cp "scripts/install-docker-agent.sh" "$universal_dir/scripts/install-docker-agent.sh"
@@ -162,20 +162,20 @@ esac
EOF
chmod +x "$universal_dir/bin/pulse-docker-agent"
-cat > "$universal_dir/bin/pulse-temp-proxy" << 'EOF'
+cat > "$universal_dir/bin/pulse-sensor-proxy" << 'EOF'
#!/bin/sh
-# Auto-detect architecture and run appropriate pulse-temp-proxy binary
+# Auto-detect architecture and run appropriate pulse-sensor-proxy binary
ARCH=$(uname -m)
case "$ARCH" in
x86_64|amd64)
- exec "$(dirname "$0")/pulse-temp-proxy-linux-amd64" "$@"
+ exec "$(dirname "$0")/pulse-sensor-proxy-linux-amd64" "$@"
;;
aarch64|arm64)
- exec "$(dirname "$0")/pulse-temp-proxy-linux-arm64" "$@"
+ exec "$(dirname "$0")/pulse-sensor-proxy-linux-arm64" "$@"
;;
armv7l|armhf)
- exec "$(dirname "$0")/pulse-temp-proxy-linux-armv7" "$@"
+ exec "$(dirname "$0")/pulse-sensor-proxy-linux-armv7" "$@"
;;
*)
echo "Unsupported architecture: $ARCH" >&2
@@ -183,7 +183,7 @@ case "$ARCH" in
;;
esac
EOF
-chmod +x "$universal_dir/bin/pulse-temp-proxy"
+chmod +x "$universal_dir/bin/pulse-sensor-proxy"
# Add VERSION file
echo "$VERSION" > "$universal_dir/VERSION"
@@ -196,16 +196,16 @@ cd ../..
# Cleanup
rm -rf "$universal_dir"
-# Copy standalone pulse-temp-proxy binaries to release directory
+# Copy standalone pulse-sensor-proxy binaries to release directory
# These are needed by install-temp-proxy.sh installer script
-echo "Copying standalone pulse-temp-proxy binaries..."
+echo "Copying standalone pulse-sensor-proxy binaries..."
for build_name in "${!builds[@]}"; do
- cp "$BUILD_DIR/pulse-temp-proxy-$build_name" "$RELEASE_DIR/"
+ cp "$BUILD_DIR/pulse-sensor-proxy-$build_name" "$RELEASE_DIR/"
done
# Generate checksums (include tarballs and standalone binaries)
cd $RELEASE_DIR
-sha256sum *.tar.gz pulse-temp-proxy-* > checksums.txt
+sha256sum *.tar.gz pulse-sensor-proxy-* > checksums.txt
cd ..
echo
diff --git a/scripts/install-temp-proxy.sh b/scripts/install-temp-proxy.sh
index 017c9dfdb..3ace69f17 100755
--- a/scripts/install-temp-proxy.sh
+++ b/scripts/install-temp-proxy.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-# install-temp-proxy.sh - Installs pulse-temp-proxy on Proxmox host for secure temperature monitoring
+# install-temp-proxy.sh - Installs pulse-sensor-proxy on Proxmox host for secure temperature monitoring
# This script is idempotent and can be safely re-run
set -euo pipefail
@@ -67,13 +67,22 @@ if ! pct status "$CTID" >/dev/null 2>&1; then
exit 1
fi
-print_info "Installing pulse-temp-proxy for container $CTID"
+print_info "Installing pulse-sensor-proxy for container $CTID"
-BINARY_PATH="/usr/local/bin/pulse-temp-proxy"
-SERVICE_PATH="/etc/systemd/system/pulse-temp-proxy.service"
-RUNTIME_DIR="/run/pulse-temp-proxy"
-SOCKET_PATH="/run/pulse-temp-proxy/pulse-temp-proxy.sock"
-SSH_DIR="/var/lib/pulse-temp-proxy/ssh"
+BINARY_PATH="/usr/local/bin/pulse-sensor-proxy"
+SERVICE_PATH="/etc/systemd/system/pulse-sensor-proxy.service"
+RUNTIME_DIR="/run/pulse-sensor-proxy"
+SOCKET_PATH="/run/pulse-sensor-proxy/pulse-sensor-proxy.sock"
+SSH_DIR="/var/lib/pulse-sensor-proxy/ssh"
+
+# Create dedicated service account if it doesn't exist
+if ! id -u pulse-sensor-proxy >/dev/null 2>&1; then
+ print_info "Creating pulse-sensor-proxy service account..."
+ useradd --system --user-group --no-create-home --shell /usr/sbin/nologin pulse-sensor-proxy
+ print_info "Service account created"
+else
+ print_info "Service account pulse-sensor-proxy already exists"
+fi
# Install binary - either from local file or download from GitHub
if [[ -n "$LOCAL_BINARY" ]]; then
@@ -105,13 +114,13 @@ else
ARCH=$(uname -m)
case $ARCH in
x86_64)
- BINARY_NAME="pulse-temp-proxy-linux-amd64"
+ BINARY_NAME="pulse-sensor-proxy-linux-amd64"
;;
aarch64|arm64)
- BINARY_NAME="pulse-temp-proxy-linux-arm64"
+ BINARY_NAME="pulse-sensor-proxy-linux-arm64"
;;
armv7l|armhf)
- BINARY_NAME="pulse-temp-proxy-linux-armv7"
+ BINARY_NAME="pulse-sensor-proxy-linux-armv7"
;;
*)
print_error "Unsupported architecture: $ARCH"
@@ -134,12 +143,19 @@ else
print_info "Binary installed to $BINARY_PATH"
fi
-# Create SSH key directory
-mkdir -p "$SSH_DIR"
-chmod 700 "$SSH_DIR"
+# Create directories with proper ownership (handles fresh installs and upgrades)
+print_info "Setting up directories with proper ownership..."
+install -d -o pulse-sensor-proxy -g pulse-sensor-proxy -m 0750 /var/lib/pulse-sensor-proxy
+install -d -o pulse-sensor-proxy -g pulse-sensor-proxy -m 0700 "$SSH_DIR"
-# Install systemd service
-print_info "Installing systemd service..."
+# Stop existing service if running (for upgrades)
+if systemctl is-active --quiet pulse-sensor-proxy 2>/dev/null; then
+ print_info "Stopping existing service for upgrade..."
+ systemctl stop pulse-sensor-proxy
+fi
+
+# Install hardened systemd service
+print_info "Installing hardened systemd service..."
cat > "$SERVICE_PATH" << 'EOF'
[Unit]
Description=Pulse Temperature Proxy
@@ -148,26 +164,47 @@ After=network.target
[Service]
Type=simple
-User=root
-ExecStart=/usr/local/bin/pulse-temp-proxy
+User=pulse-sensor-proxy
+Group=pulse-sensor-proxy
+WorkingDirectory=/var/lib/pulse-sensor-proxy
+ExecStart=/usr/local/bin/pulse-sensor-proxy
Restart=on-failure
RestartSec=5s
-# Runtime directory for socket
-RuntimeDirectory=pulse-temp-proxy
+# Runtime dirs/sockets
+RuntimeDirectory=pulse-sensor-proxy
RuntimeDirectoryMode=0775
+UMask=0007
-# Security hardening
+# Core hardening
NoNewPrivileges=true
-PrivateTmp=true
ProtectSystem=strict
-ProtectHome=true
-ReadWritePaths=/var/lib/pulse-temp-proxy
+ProtectHome=read-only
+ReadWritePaths=/var/lib/pulse-sensor-proxy
+ProtectKernelTunables=true
+ProtectKernelModules=true
+ProtectControlGroups=true
+ProtectClock=true
+PrivateTmp=true
+PrivateDevices=true
+ProtectProc=invisible
+ProcSubset=pid
+LockPersonality=true
+RemoveIPC=true
+RestrictSUIDSGID=true
+RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
+RestrictNamespaces=true
+SystemCallFilter=@system-service
+SystemCallErrorNumber=EPERM
+CapabilityBoundingSet=
+AmbientCapabilities=
+KeyringMode=private
+LimitNOFILE=1024
# Logging
StandardOutput=journal
StandardError=journal
-SyslogIdentifier=pulse-temp-proxy
+SyslogIdentifier=pulse-sensor-proxy
[Install]
WantedBy=multi-user.target
@@ -176,8 +213,8 @@ EOF
# Reload systemd and start service
print_info "Enabling and starting service..."
systemctl daemon-reload
-systemctl enable pulse-temp-proxy.service
-systemctl restart pulse-temp-proxy.service
+systemctl enable pulse-sensor-proxy.service
+systemctl restart pulse-sensor-proxy.service
# Wait for socket to appear
print_info "Waiting for socket..."
@@ -190,7 +227,7 @@ done
if [[ ! -S "$SOCKET_PATH" ]]; then
print_error "Socket did not appear after 10 seconds"
- print_info "Check service status: systemctl status pulse-temp-proxy"
+ print_info "Check service status: systemctl status pulse-sensor-proxy"
exit 1
fi
@@ -198,15 +235,15 @@ print_info "Socket ready at $SOCKET_PATH"
# Configure LXC bind mount - mount entire directory for socket stability
LXC_CONFIG="/etc/pve/lxc/${CTID}.conf"
-BIND_ENTRY="lxc.mount.entry: /run/pulse-temp-proxy run/pulse-temp-proxy none bind,create=dir 0 0"
+BIND_ENTRY="lxc.mount.entry: /run/pulse-sensor-proxy run/pulse-sensor-proxy none bind,create=dir 0 0"
# Check if bind mount already exists
-if grep -q "pulse-temp-proxy" "$LXC_CONFIG"; then
+if grep -q "pulse-sensor-proxy" "$LXC_CONFIG"; then
print_info "Bind mount already configured in LXC config"
# Remove old socket-level bind if it exists
- if grep -q "pulse-temp-proxy.sock" "$LXC_CONFIG"; then
+ if grep -q "pulse-sensor-proxy.sock" "$LXC_CONFIG"; then
print_info "Upgrading from socket-level to directory-level bind mount..."
- sed -i '/pulse-temp-proxy\.sock/d' "$LXC_CONFIG"
+ sed -i '/pulse-sensor-proxy\.sock/d' "$LXC_CONFIG"
echo "$BIND_ENTRY" >> "$LXC_CONFIG"
NEEDS_RESTART=true
fi
@@ -227,7 +264,7 @@ fi
# Verify socket is accessible in container
print_info "Verifying socket accessibility..."
-if pct exec "$CTID" -- test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock; then
+if pct exec "$CTID" -- test -S /run/pulse-sensor-proxy/pulse-sensor-proxy.sock; then
print_info "Socket is accessible in container"
else
print_warn "Socket is not yet accessible in container"
@@ -236,11 +273,11 @@ fi
# Test proxy status
print_info "Testing proxy status..."
-if systemctl is-active --quiet pulse-temp-proxy; then
- print_info "${GREEN}✓${NC} pulse-temp-proxy is running"
+if systemctl is-active --quiet pulse-sensor-proxy; then
+ print_info "${GREEN}✓${NC} pulse-sensor-proxy is running"
else
- print_error "pulse-temp-proxy is not running"
- print_info "Check logs: journalctl -u pulse-temp-proxy -n 50"
+ print_error "pulse-sensor-proxy is not running"
+ print_info "Check logs: journalctl -u pulse-sensor-proxy -n 50"
exit 1
fi
@@ -255,7 +292,7 @@ print_info " 2. Go to Settings → Enable Temperature Monitoring"
print_info " 3. The proxy will automatically discover and configure cluster nodes"
print_info ""
print_info "To check proxy status:"
-print_info " systemctl status pulse-temp-proxy"
-print_info " journalctl -u pulse-temp-proxy -f"
+print_info " systemctl status pulse-sensor-proxy"
+print_info " journalctl -u pulse-sensor-proxy -f"
exit 0
diff --git a/scripts/pulse-proxy-rotate-keys.sh b/scripts/pulse-proxy-rotate-keys.sh
new file mode 100755
index 000000000..c448cf336
--- /dev/null
+++ b/scripts/pulse-proxy-rotate-keys.sh
@@ -0,0 +1,314 @@
+#!/usr/bin/env bash
+# pulse-proxy-rotate-keys.sh
+# Rotate pulse-sensor-proxy SSH keys with staging, verification, and rollback support.
+
+set -euo pipefail
+
+BASE_DIR="/var/lib/pulse-sensor-proxy"
+ACTIVE_DIR="${BASE_DIR}/ssh"
+POOL_DIR="${BASE_DIR}/ssh.d"
+STAGING_DIR="${POOL_DIR}/next"
+BACKUP_DIR="${POOL_DIR}/prev"
+SOCKET_PATH="/run/pulse-sensor-proxy/pulse-sensor-proxy.sock"
+SCRIPT_TAG="pulse-proxy-rotate"
+SSH_KEY_TYPE="ed25519"
+SSH_KEY_COMMENT="pulse-sensor-proxy"
+SSH_KEY_FILE="id_${SSH_KEY_TYPE}"
+
+dry_run=false
+do_rollback=false
+
+usage() {
+ cat <<'EOF'
+Usage: pulse-proxy-rotate-keys.sh [--dry-run] [--rollback]
+
+Options:
+ --dry-run Walk through all steps without modifying state or contacting nodes.
+ --rollback Restore the previously active keypair (requires ssh.d/prev).
+ -h, --help Show this help.
+
+Examples:
+ ./pulse-proxy-rotate-keys.sh --dry-run
+ ./pulse-proxy-rotate-keys.sh
+ ./pulse-proxy-rotate-keys.sh --rollback
+EOF
+}
+
+log_info() { logger -t "${SCRIPT_TAG}" "INFO: $*"; printf '[INFO] %s\n' "$*"; }
+log_warn() { logger -t "${SCRIPT_TAG}" "WARN: $*"; printf '[WARN] %s\n' "$*"; }
+log_error() { logger -t "${SCRIPT_TAG}" "ERROR: $*"; printf '[ERROR] %s\n' "$*" >&2; }
+
+require_root() {
+ if (( EUID != 0 )); then
+ log_error "This script must be run as root."
+ exit 1
+ fi
+}
+
+require_cmds() {
+ local missing=()
+ for cmd in ssh-keygen ssh jq socat python3 stat mkdir; do
+ if ! command -v "$cmd" >/dev/null 2>&1; then
+ missing+=("$cmd")
+ fi
+ done
+ if ((${#missing[@]} > 0)); then
+ log_error "Missing required commands: ${missing[*]}"
+ exit 1
+ fi
+}
+
+parse_args() {
+ while (($#)); do
+ case "$1" in
+ --dry-run) dry_run=true ;;
+ --rollback) do_rollback=true ;;
+ -h|--help) usage; exit 0 ;;
+ *) log_error "Unknown option: $1"; usage; exit 1 ;;
+ esac
+ shift
+ done
+ if $dry_run && $do_rollback; then
+ log_error "Cannot combine --dry-run and --rollback."
+ exit 1
+ fi
+}
+
+ensure_socket() {
+ if [[ ! -S "$SOCKET_PATH" ]]; then
+ log_error "Proxy socket not found at $SOCKET_PATH. Is pulse-sensor-proxy running?"
+ exit 1
+ fi
+}
+
+run_cmd() {
+ if $dry_run; then
+ log_info "[dry-run] $*"
+ else
+ "$@"
+ fi
+}
+
+json_rpc() {
+ local method=$1
+ local params_json=${2:-"{}"}
+ local response
+ if $dry_run; then
+ log_info "[dry-run] would call RPC ${method} with params ${params_json}"
+ printf '{"success":true,"data":{}}'
+ return 0
+ fi
+
+ response=$(SOCKET="$SOCKET_PATH" METHOD="$method" PARAMS="$params_json" python3 - <<'PY'
+import json
+import os
+import socket
+import sys
+import uuid
+
+sock_path = os.environ["SOCKET"]
+method = os.environ["METHOD"]
+params = json.loads(os.environ["PARAMS"]) if os.environ["PARAMS"] else {}
+payload = {
+ "correlation_id": str(uuid.uuid4()),
+ "method": method,
+ "params": params,
+}
+
+data = (json.dumps(payload) + "\n").encode()
+with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as sock:
+ sock.connect(sock_path)
+ sock.sendall(data)
+ sock.shutdown(socket.SHUT_WR)
+ chunks = []
+ while True:
+ chunk = sock.recv(65536)
+ if not chunk:
+ break
+ chunks.append(chunk)
+ sys.stdout.write(b"".join(chunks).decode())
+PY
+) || {
+ log_error "RPC '${method}' failed to execute."
+ exit 1
+ }
+ echo "$response"
+}
+
+require_success() {
+ local resp=$1
+ local method=$2
+ local ok
+ ok=$(echo "$resp" | jq -r '.success // false')
+ if [[ "$ok" != "true" ]]; then
+ local err
+ err=$(echo "$resp" | jq -r '.error // empty')
+ log_error "RPC '${method}' failed: ${err:-unknown error}"
+ exit 1
+ fi
+}
+
+prepare_dirs() {
+ for dir in "$BASE_DIR" "$POOL_DIR" "$STAGING_DIR"; do
+ if $dry_run; then
+ log_info "[dry-run] ensure directory $dir owned by pulse-proxy:pulse-proxy"
+ continue
+ fi
+ mkdir -p "$dir"
+ chown pulse-proxy:pulse-proxy "$dir"
+ chmod 0750 "$dir"
+ done
+}
+
+clean_staging() {
+ if [[ -d "$STAGING_DIR" ]]; then
+ if $dry_run; then
+ log_info "[dry-run] would remove existing staging directory $STAGING_DIR"
+ else
+ rm -rf "$STAGING_DIR"
+ mkdir -p "$STAGING_DIR"
+ chown pulse-proxy:pulse-proxy "$STAGING_DIR"
+ chmod 0750 "$STAGING_DIR"
+ fi
+ fi
+}
+
+generate_keypair() {
+ local key_path="$STAGING_DIR/${SSH_KEY_FILE}"
+ if $dry_run; then
+ log_info "[dry-run] would generate new ${SSH_KEY_TYPE} keypair at $key_path"
+ return
+ fi
+ clean_staging
+ log_info "Generating new ${SSH_KEY_TYPE} keypair in staging..."
+ ssh-keygen -t "$SSH_KEY_TYPE" -N '' -C "$SSH_KEY_COMMENT rotation $(date -u +%Y%m%dT%H%M%SZ)" -f "$key_path" >/dev/null
+ chown pulse-proxy:pulse-proxy "$key_path" "${key_path}.pub"
+ chmod 0600 "$key_path"
+ chmod 0640 "${key_path}.pub"
+}
+
+ensure_cluster_keys() {
+ local key_dir=$1
+ local payload
+ payload=$(jq -cn --arg dir "$key_dir" '{key_dir: $dir}')
+ local resp
+ resp=$(json_rpc "ensure_cluster_keys" "$payload")
+ require_success "$resp" "ensure_cluster_keys"
+ log_info "Proxy reported successful key distribution."
+}
+
+list_nodes() {
+ local resp
+ resp=$(json_rpc "register_nodes")
+ require_success "$resp" "register_nodes"
+ echo "$resp" | jq -r '.data.nodes[]?.name // empty' | sort -u
+}
+
+verify_nodes() {
+ local key_file="$1"
+ local -a bad_nodes=()
+ local rc
+ while read -r node; do
+ [[ -z "$node" ]] && continue
+ log_info "Verifying SSH access on ${node}..."
+ if $dry_run; then
+ log_info "[dry-run] would run ssh -i $key_file root@${node} sensors -j"
+ continue
+ fi
+ if ssh -i "$key_file" -o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10 "root@${node}" "sensors -j" >/dev/null 2>&1; then
+ log_info "Verification succeeded for ${node}."
+ else
+ log_warn "Verification failed for ${node}."
+ bad_nodes+=("$node")
+ fi
+ done < <(list_nodes)
+
+ if ((${#bad_nodes[@]} > 0)); then
+ log_error "Verification failed for: ${bad_nodes[*]}"
+ exit 1
+ fi
+}
+
+swap_keys() {
+ local timestamp
+ timestamp=$(date -u +%Y%m%dT%H%M%SZ)
+
+ if $dry_run; then
+ log_info "[dry-run] would rotate directories:"
+ log_info "[dry-run] mv ${BACKUP_DIR} ${POOL_DIR}/prev.${timestamp} (if exists)"
+ log_info "[dry-run] mv ${ACTIVE_DIR} ${BACKUP_DIR}"
+ log_info "[dry-run] mv ${STAGING_DIR} ${ACTIVE_DIR}"
+ return
+ fi
+
+ log_info "Activating new keypair..."
+ if [[ -d "$BACKUP_DIR" ]]; then
+ mv "$BACKUP_DIR" "${POOL_DIR}/prev.${timestamp}"
+ fi
+ mv "$ACTIVE_DIR" "$BACKUP_DIR"
+ mv "$STAGING_DIR" "$ACTIVE_DIR"
+ chown -R pulse-proxy:pulse-proxy "$ACTIVE_DIR" "$BACKUP_DIR"
+ chmod 0750 "$ACTIVE_DIR" "$BACKUP_DIR"
+ chmod 0600 "$ACTIVE_DIR/${SSH_KEY_FILE}"
+ chmod 0640 "$ACTIVE_DIR/${SSH_KEY_FILE}.pub"
+ log_info "Key rotation complete. Previous keys stored at ${BACKUP_DIR}."
+}
+
+rollback_keys() {
+ if [[ ! -d "$BACKUP_DIR" ]]; then
+ log_error "No backup directory (${BACKUP_DIR}) present. Cannot rollback."
+ exit 1
+ fi
+ local timestamp
+ timestamp=$(date -u +%Y%m%dT%H%M%SZ)
+
+ if $dry_run; then
+ log_info "[dry-run] would rollback by swapping ${ACTIVE_DIR} with ${BACKUP_DIR}"
+ return
+ fi
+
+ log_warn "Rolling back to previous keypair..."
+ local failed_dir="${POOL_DIR}/failed.${timestamp}"
+ if [[ -d "$ACTIVE_DIR" ]]; then
+ mv "$ACTIVE_DIR" "$failed_dir"
+ fi
+ mv "$BACKUP_DIR" "$ACTIVE_DIR"
+ chown -R pulse-proxy:pulse-proxy "$ACTIVE_DIR"
+ chmod 0600 "$ACTIVE_DIR/${SSH_KEY_FILE}"
+ chmod 0640 "$ACTIVE_DIR/${SSH_KEY_FILE}.pub"
+ log_info "Rollback complete. Old keys preserved at ${failed_dir}."
+
+ log_info "Re-pushing restored keypair to cluster nodes..."
+ ensure_cluster_keys "$ACTIVE_DIR"
+}
+
+main() {
+ parse_args "$@"
+ require_root
+ require_cmds
+
+ if $do_rollback; then
+ ensure_socket
+ rollback_keys
+ return
+ fi
+
+ prepare_dirs
+ ensure_socket
+
+ generate_keypair
+
+ local staging_key="${STAGING_DIR}/${SSH_KEY_FILE}"
+ if [[ ! -f "${staging_key}" && $dry_run == false ]]; then
+ log_error "Staged private key missing at ${staging_key}"
+ exit 1
+ fi
+
+ ensure_cluster_keys "$STAGING_DIR"
+ verify_nodes "$staging_key"
+ swap_keys
+
+ log_info "Rotation workflow finished successfully."
+}
+
+main "$@"
diff --git a/scripts/pulse-sensor-proxy.service b/scripts/pulse-sensor-proxy.service
new file mode 100644
index 000000000..7452614f1
--- /dev/null
+++ b/scripts/pulse-sensor-proxy.service
@@ -0,0 +1,51 @@
+[Unit]
+Description=Pulse Sensor Proxy
+Documentation=https://github.com/rcourtman/Pulse
+After=network.target
+
+[Service]
+Type=simple
+User=pulse-sensor-proxy
+Group=pulse-sensor-proxy
+WorkingDirectory=/var/lib/pulse-sensor-proxy
+ExecStart=/usr/local/bin/pulse-sensor-proxy
+Restart=on-failure
+RestartSec=5s
+
+# Runtime dirs/sockets
+RuntimeDirectory=pulse-sensor-proxy
+RuntimeDirectoryMode=0775
+UMask=0007
+
+# Core hardening
+NoNewPrivileges=true
+ProtectSystem=strict
+ProtectHome=read-only
+ReadWritePaths=/var/lib/pulse-sensor-proxy
+ProtectKernelTunables=true
+ProtectKernelModules=true
+ProtectControlGroups=true
+ProtectClock=true
+PrivateTmp=true
+PrivateDevices=true
+ProtectProc=invisible
+ProcSubset=pid
+LockPersonality=true
+RemoveIPC=true
+RestrictSUIDSGID=true
+RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
+RestrictNamespaces=true
+SystemCallFilter=@system-service
+SystemCallErrorNumber=EPERM
+CapabilityBoundingSet=
+AmbientCapabilities=
+KeyringMode=private
+LimitNOFILE=1024
+
+# Logging
+StandardOutput=journal
+StandardError=journal
+SyslogIdentifier=pulse-sensor-proxy
+
+[Install]
+WantedBy=multi-user.target
diff --git a/scripts/pulse-temp-proxy.service b/scripts/pulse-temp-proxy.service
deleted file mode 100644
index 9ec26443e..000000000
--- a/scripts/pulse-temp-proxy.service
+++ /dev/null
@@ -1,26 +0,0 @@
-[Unit]
-Description=Pulse Temperature Proxy
-Documentation=https://github.com/rcourtman/Pulse
-After=network.target
-
-[Service]
-Type=simple
-User=root
-ExecStart=/usr/local/bin/pulse-temp-proxy
-Restart=on-failure
-RestartSec=5s
-
-# Security hardening
-NoNewPrivileges=true
-PrivateTmp=true
-ProtectSystem=strict
-ProtectHome=true
-ReadWritePaths=/var/lib/pulse-temp-proxy /var/run
-
-# Logging
-StandardOutput=journal
-StandardError=journal
-SyslogIdentifier=pulse-temp-proxy
-
-[Install]
-WantedBy=multi-user.target