diff --git a/.gitignore b/.gitignore index c49e4ab0b..b51fb3eb9 100644 --- a/.gitignore +++ b/.gitignore @@ -145,4 +145,4 @@ cloud-relay/ scripts/agent/ docs/internal/ claude.md -pulse-temp-proxy +/pulse-sensor-proxy diff --git a/cmd/pulse-temp-proxy/auth.go b/cmd/pulse-sensor-proxy/auth.go similarity index 51% rename from cmd/pulse-temp-proxy/auth.go rename to cmd/pulse-sensor-proxy/auth.go index 0cc4c914f..64ccedadd 100644 --- a/cmd/pulse-temp-proxy/auth.go +++ b/cmd/pulse-sensor-proxy/auth.go @@ -8,18 +8,25 @@ import ( "github.com/rs/zerolog/log" ) -// verifyPeerCredentials checks if the connecting process is authorized -// Returns nil if authorized, error otherwise -func verifyPeerCredentials(conn net.Conn) error { +// peerCredentials holds extracted credentials from SO_PEERCRED +type peerCredentials struct { + uid uint32 + pid uint32 + gid uint32 +} + +// extractPeerCredentials extracts and verifies peer credentials +// Returns credentials if authorized, error otherwise +func extractPeerCredentials(conn net.Conn) (*peerCredentials, error) { // Get the underlying file descriptor unixConn, ok := conn.(*net.UnixConn) if !ok { - return fmt.Errorf("not a unix connection") + return nil, fmt.Errorf("not a unix connection") } file, err := unixConn.File() if err != nil { - return fmt.Errorf("failed to get file descriptor: %w", err) + return nil, fmt.Errorf("failed to get file descriptor: %w", err) } defer file.Close() @@ -28,7 +35,7 @@ func verifyPeerCredentials(conn net.Conn) error { // Get peer credentials using SO_PEERCRED cred, err := syscall.GetsockoptUcred(fd, syscall.SOL_SOCKET, syscall.SO_PEERCRED) if err != nil { - return fmt.Errorf("failed to get peer credentials: %w", err) + return nil, fmt.Errorf("failed to get peer credentials: %w", err) } log.Debug(). @@ -39,14 +46,29 @@ func verifyPeerCredentials(conn net.Conn) error { // Allow root (UID 0) - this covers most service scenarios if cred.Uid == 0 { - return nil + return &peerCredentials{ + uid: cred.Uid, + pid: uint32(cred.Pid), + gid: cred.Gid, + }, nil } // Allow the proxy's own user (for testing/debugging) if cred.Uid == uint32(syscall.Getuid()) { - return nil + return &peerCredentials{ + uid: cred.Uid, + pid: uint32(cred.Pid), + gid: cred.Gid, + }, nil } // Reject all other users - return fmt.Errorf("unauthorized: uid=%d gid=%d", cred.Uid, cred.Gid) + return nil, fmt.Errorf("unauthorized: uid=%d gid=%d", cred.Uid, cred.Gid) +} + +// verifyPeerCredentials checks if the connecting process is authorized (legacy function) +// Returns nil if authorized, error otherwise +func verifyPeerCredentials(conn net.Conn) error { + _, err := extractPeerCredentials(conn) + return err } diff --git a/cmd/pulse-sensor-proxy/config.go b/cmd/pulse-sensor-proxy/config.go new file mode 100644 index 000000000..ddb678a49 --- /dev/null +++ b/cmd/pulse-sensor-proxy/config.go @@ -0,0 +1,172 @@ +package main + +import ( + "fmt" + "net" + "os" + "strings" + + "github.com/rs/zerolog/log" + "gopkg.in/yaml.v3" +) + +// Config holds proxy configuration +type Config struct { + AllowedSourceSubnets []string `yaml:"allowed_source_subnets"` + MetricsAddress string `yaml:"metrics_address"` +} + +// loadConfig loads configuration from file and environment variables +func loadConfig(configPath string) (*Config, error) { + cfg := &Config{} + + // Try to load config file if it exists + if configPath != "" { + if _, err := os.Stat(configPath); err == nil { + data, err := os.ReadFile(configPath) + if err != nil { + return nil, fmt.Errorf("failed to read config file: %w", err) + } + + if err := yaml.Unmarshal(data, cfg); err != nil { + return nil, fmt.Errorf("failed to parse config file: %w", err) + } + + log.Info(). + Str("config_file", configPath). + Int("subnet_count", len(cfg.AllowedSourceSubnets)). + Msg("Loaded configuration from file") + } + } + + // Append from environment variable if set + if envSubnets := os.Getenv("PULSE_SENSOR_PROXY_ALLOWED_SUBNETS"); envSubnets != "" { + envList := strings.Split(envSubnets, ",") + cfg.AllowedSourceSubnets = append(cfg.AllowedSourceSubnets, envList...) + log.Info(). + Int("env_subnet_count", len(envList)). + Msg("Appended subnets from environment variable") + } + + // Metrics address from environment variable + if envMetrics := os.Getenv("PULSE_SENSOR_PROXY_METRICS_ADDR"); envMetrics != "" { + cfg.MetricsAddress = envMetrics + log.Info().Str("metrics_addr", envMetrics).Msg("Metrics address set from environment") + } + + // Default metrics address if not configured + if cfg.MetricsAddress == "" { + cfg.MetricsAddress = "default" // Will use defaultMetricsAddr + } + + // Parse and validate all subnets + if len(cfg.AllowedSourceSubnets) > 0 { + normalized, err := parseAllowedSubnets(cfg.AllowedSourceSubnets) + if err != nil { + return nil, fmt.Errorf("invalid subnet configuration: %w", err) + } + cfg.AllowedSourceSubnets = normalized + log.Info(). + Strs("allowed_subnets", cfg.AllowedSourceSubnets). + Msg("Validated and normalized subnet configuration") + } else { + // Auto-detect if no configuration provided + detected := detectHostCIDRs() + if len(detected) == 0 { + log.Warn().Msg("No allowed_source_subnets configured and no host addresses detected") + } else { + cfg.AllowedSourceSubnets = detected + log.Warn(). + Strs("auto_detected_subnets", detected). + Msg("No allowed_source_subnets configured; using detected host addresses (recommended to configure explicitly)") + } + } + + return cfg, nil +} + +// detectHostCIDRs detects local host IP addresses as /32 (IPv4) or /128 (IPv6) CIDRs +func detectHostCIDRs() []string { + var cidrs []string + + ifaces, err := net.Interfaces() + if err != nil { + log.Warn().Err(err).Msg("Failed to enumerate network interfaces") + return cidrs + } + + for _, iface := range ifaces { + // Skip down or loopback interfaces + if iface.Flags&net.FlagUp == 0 || iface.Flags&net.FlagLoopback != 0 { + continue + } + + addrs, err := iface.Addrs() + if err != nil { + log.Warn().Str("iface", iface.Name).Err(err).Msg("Address lookup failed") + continue + } + + for _, addr := range addrs { + ipNet, ok := addr.(*net.IPNet) + if !ok { + continue + } + + ip := ipNet.IP + // Skip loopback and link-local addresses + if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() { + continue + } + + // Add as /32 for IPv4, /128 for IPv6 + if ip.To4() != nil { + cidrs = append(cidrs, ip.String()+"/32") + } else if ip.To16() != nil { + cidrs = append(cidrs, ip.String()+"/128") + } + } + } + + return cidrs +} + +// parseAllowedSubnets validates and normalizes subnet specifications +func parseAllowedSubnets(cfg []string) ([]string, error) { + seen := make(map[string]struct{}) + var normalized []string + + for _, raw := range cfg { + entry := strings.TrimSpace(raw) + if entry == "" { + continue + } + + // Try parsing as CIDR + if _, _, err := net.ParseCIDR(entry); err == nil { + if _, exists := seen[entry]; !exists { + seen[entry] = struct{}{} + normalized = append(normalized, entry) + } + continue + } + + // Try parsing as single IP + if ip := net.ParseIP(entry); ip != nil { + norm := entry + "/32" + if ip.To4() == nil { + norm = entry + "/128" + } + if _, exists := seen[norm]; !exists { + seen[norm] = struct{}{} + normalized = append(normalized, norm) + } + continue + } + + // Invalid format + return nil, fmt.Errorf("invalid subnet or address: %s", entry) + } + + return normalized, nil +} diff --git a/cmd/pulse-sensor-proxy/main.go b/cmd/pulse-sensor-proxy/main.go new file mode 100644 index 000000000..cbcc0c726 --- /dev/null +++ b/cmd/pulse-sensor-proxy/main.go @@ -0,0 +1,732 @@ +package main + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net" + "os" + "os/signal" + "path/filepath" + "strings" + "syscall" + "time" + + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" + "github.com/spf13/cobra" +) + +// Version information (set at build time with -ldflags) +var ( + Version = "dev" + BuildTime = "unknown" + GitCommit = "unknown" +) + +const ( + defaultSocketPath = "/run/pulse-sensor-proxy/pulse-sensor-proxy.sock" + defaultSSHKeyPath = "/var/lib/pulse-sensor-proxy/ssh" + defaultConfigPath = "/etc/pulse-sensor-proxy/config.yaml" + maxRequestBytes = 16 * 1024 // 16 KiB max request size +) + +var rootCmd = &cobra.Command{ + Use: "pulse-sensor-proxy", + Short: "Pulse Sensor Proxy - Secure sensor data bridge for containerized Pulse", + Long: `Sensor monitoring proxy that keeps SSH keys on the host and exposes sensor data via unix socket`, + Version: Version, + Run: func(cmd *cobra.Command, args []string) { + runProxy() + }, +} + +var versionCmd = &cobra.Command{ + Use: "version", + Short: "Print version information", + Run: func(cmd *cobra.Command, args []string) { + fmt.Printf("pulse-sensor-proxy %s\n", Version) + if BuildTime != "unknown" { + fmt.Printf("Built: %s\n", BuildTime) + } + if GitCommit != "unknown" { + fmt.Printf("Commit: %s\n", GitCommit) + } + }, +} + +func init() { + rootCmd.AddCommand(versionCmd) +} + +func main() { + if err := rootCmd.Execute(); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +// Proxy manages the temperature monitoring proxy +type Proxy struct { + socketPath string + sshKeyPath string + listener net.Listener + rateLimiter *rateLimiter + nodeGate *nodeGate + router map[string]handlerFunc + config *Config + metrics *ProxyMetrics +} + +// RPC request types +const ( + RPCEnsureClusterKeys = "ensure_cluster_keys" + RPCRegisterNodes = "register_nodes" + RPCGetTemperature = "get_temperature" + RPCGetStatus = "get_status" +) + +// RPCRequest represents a request from Pulse +type RPCRequest struct { + CorrelationID string `json:"correlation_id,omitempty"` + Method string `json:"method"` + Params map[string]interface{} `json:"params"` +} + +// RPCResponse represents a response to Pulse +type RPCResponse struct { + CorrelationID string `json:"correlation_id,omitempty"` + Success bool `json:"success"` + Data interface{} `json:"data,omitempty"` + Error string `json:"error,omitempty"` +} + +// handlerFunc is the signature for RPC method handlers +type handlerFunc func(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) + +func runProxy() { + // Initialize logger + zerolog.TimeFieldFormat = zerolog.TimeFormatUnix + log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}) + + socketPath := os.Getenv("PULSE_SENSOR_PROXY_SOCKET") + if socketPath == "" { + socketPath = defaultSocketPath + } + + sshKeyPath := os.Getenv("PULSE_SENSOR_PROXY_SSH_DIR") + if sshKeyPath == "" { + sshKeyPath = defaultSSHKeyPath + } + + // Load configuration + configPath := os.Getenv("PULSE_SENSOR_PROXY_CONFIG") + if configPath == "" { + configPath = defaultConfigPath + } + + cfg, err := loadConfig(configPath) + if err != nil { + log.Fatal().Err(err).Msg("Failed to load configuration") + } + + // Initialize metrics + metrics := NewProxyMetrics(Version) + + log.Info(). + Str("socket", socketPath). + Str("ssh_key_dir", sshKeyPath). + Str("config_path", configPath). + Str("version", Version). + Msg("Starting pulse-sensor-proxy") + + proxy := &Proxy{ + socketPath: socketPath, + sshKeyPath: sshKeyPath, + rateLimiter: newRateLimiter(), + nodeGate: newNodeGate(), + config: cfg, + metrics: metrics, + } + + // Register RPC method handlers + proxy.router = map[string]handlerFunc{ + RPCGetStatus: proxy.handleGetStatusV2, + RPCEnsureClusterKeys: proxy.handleEnsureClusterKeysV2, + RPCRegisterNodes: proxy.handleRegisterNodesV2, + RPCGetTemperature: proxy.handleGetTemperatureV2, + } + + if err := proxy.Start(); err != nil { + log.Fatal().Err(err).Msg("Failed to start proxy") + } + + // Start metrics server + if err := metrics.Start(cfg.MetricsAddress); err != nil { + log.Fatal().Err(err).Msg("Failed to start metrics server") + } + + // Setup signal handlers + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) + + <-sigChan + log.Info().Msg("Shutting down proxy...") + proxy.Stop() + proxy.rateLimiter.shutdown() + metrics.Shutdown(context.Background()) + log.Info().Msg("Proxy stopped") +} + +// Start initializes and starts the proxy +func (p *Proxy) Start() error { + // Create SSH key directory if it doesn't exist + if err := os.MkdirAll(p.sshKeyPath, 0700); err != nil { + return fmt.Errorf("failed to create SSH key directory: %w", err) + } + + // Ensure SSH keypair exists + if err := p.ensureSSHKeypair(); err != nil { + return fmt.Errorf("failed to ensure SSH keypair: %w", err) + } + + // Remove existing socket if it exists + if err := os.RemoveAll(p.socketPath); err != nil { + return fmt.Errorf("failed to remove existing socket: %w", err) + } + + // Create socket directory if needed + socketDir := filepath.Dir(p.socketPath) + if err := os.MkdirAll(socketDir, 0755); err != nil { + return fmt.Errorf("failed to create socket directory: %w", err) + } + + // Create unix socket listener + listener, err := net.Listen("unix", p.socketPath) + if err != nil { + return fmt.Errorf("failed to create unix socket: %w", err) + } + p.listener = listener + + // Set socket permissions to owner+group only + // We use SO_PEERCRED for authentication, so we don't need world-readable + if err := os.Chmod(p.socketPath, 0660); err != nil { + log.Warn().Err(err).Msg("Failed to set socket permissions") + } + + log.Info().Str("socket", p.socketPath).Msg("Unix socket ready") + + // Start accepting connections + go p.acceptConnections() + + return nil +} + +// Stop shuts down the proxy +func (p *Proxy) Stop() { + if p.listener != nil { + p.listener.Close() + os.Remove(p.socketPath) + } +} + +// acceptConnections handles incoming socket connections +func (p *Proxy) acceptConnections() { + for { + conn, err := p.listener.Accept() + if err != nil { + // Check if listener was closed + if opErr, ok := err.(*net.OpError); ok && opErr.Err.Error() == "use of closed network connection" { + return + } + log.Error().Err(err).Msg("Failed to accept connection") + continue + } + + go p.handleConnection(conn) + } +} + +// handleConnection processes a single RPC request with full validation and throttling +func (p *Proxy) handleConnection(conn net.Conn) { + defer conn.Close() + + // Track concurrent requests + p.metrics.queueDepth.Inc() + defer p.metrics.queueDepth.Dec() + + // Start timing for latency metrics + startTime := time.Now() + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + // Set read deadline + if err := conn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil { + log.Warn().Err(err).Msg("Failed to set read deadline") + } + + // Extract and verify peer credentials + cred, err := extractPeerCredentials(conn) + if err != nil { + log.Warn().Err(err).Msg("Peer credentials unavailable") + p.sendErrorV2(conn, "unauthorized", "") + return + } + + // Check rate limit and concurrency + releaseLimiter, ok := p.rateLimiter.allow(peerID{uid: cred.uid, pid: cred.pid}) + if !ok { + p.metrics.rateLimitHits.Inc() + log.Warn(). + Uint32("uid", cred.uid). + Uint32("pid", cred.pid). + Msg("Rate limit exceeded") + p.sendErrorV2(conn, "rate limit exceeded", "") + return + } + defer releaseLimiter() + + // Limit request size and decode + lr := io.LimitReader(conn, maxRequestBytes) + decoder := json.NewDecoder(lr) + decoder.DisallowUnknownFields() + + var req RPCRequest + if err := decoder.Decode(&req); err != nil { + if errors.Is(err, io.EOF) || err.Error() == "EOF" { + p.sendErrorV2(conn, "empty request", "") + return + } + p.sendErrorV2(conn, "invalid request format", "") + return + } + + // Check if payload was too large + if decoder.More() { + p.sendErrorV2(conn, "payload too large", req.CorrelationID) + return + } + + // Sanitize correlation ID + req.CorrelationID = sanitizeCorrelationID(req.CorrelationID) + + // Create contextual logger + logger := log.With(). + Str("corr_id", req.CorrelationID). + Uint32("uid", cred.uid). + Uint32("pid", cred.pid). + Str("method", req.Method). + Logger() + + // Prepare response + resp := RPCResponse{ + CorrelationID: req.CorrelationID, + Success: false, + } + + // Find handler + handler := p.router[req.Method] + if handler == nil { + resp.Error = "unknown method" + logger.Warn().Msg("Unknown method") + p.sendResponse(conn, resp) + return + } + + // Execute handler + result, err := handler(ctx, &req, logger) + if err != nil { + resp.Error = err.Error() + logger.Warn().Err(err).Msg("Handler failed") + p.sendResponse(conn, resp) + // Record failed request + p.metrics.rpcRequests.WithLabelValues(req.Method, "error").Inc() + p.metrics.rpcLatency.WithLabelValues(req.Method).Observe(time.Since(startTime).Seconds()) + return + } + + // Success + resp.Success = true + resp.Data = result + logger.Info().Msg("Request completed") + p.sendResponse(conn, resp) + + // Record successful request + p.metrics.rpcRequests.WithLabelValues(req.Method, "success").Inc() + p.metrics.rpcLatency.WithLabelValues(req.Method).Observe(time.Since(startTime).Seconds()) +} + +// sendError sends an error response (legacy function) +func (p *Proxy) sendError(conn net.Conn, message string) { + resp := RPCResponse{ + Success: false, + Error: message, + } + encoder := json.NewEncoder(conn) + encoder.Encode(resp) +} + +// sendErrorV2 sends an error response with correlation ID +func (p *Proxy) sendErrorV2(conn net.Conn, message, correlationID string) { + resp := RPCResponse{ + CorrelationID: correlationID, + Success: false, + Error: message, + } + encoder := json.NewEncoder(conn) + encoder.Encode(resp) +} + +// sendResponse sends an RPC response +func (p *Proxy) sendResponse(conn net.Conn, resp RPCResponse) { + encoder := json.NewEncoder(conn) + if err := encoder.Encode(resp); err != nil { + log.Error().Err(err).Msg("Failed to encode RPC response") + } +} + +// handleGetStatus returns proxy status +func (p *Proxy) handleGetStatus(req RPCRequest) RPCResponse { + pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub") + pubKey, err := os.ReadFile(pubKeyPath) + if err != nil { + return RPCResponse{ + Success: false, + Error: fmt.Sprintf("failed to read public key: %v", err), + } + } + + return RPCResponse{ + Success: true, + Data: map[string]interface{}{ + "version": Version, + "public_key": string(pubKey), + "ssh_dir": p.sshKeyPath, + }, + } +} + +// ensureSSHKeypair generates SSH keypair if it doesn't exist +func (p *Proxy) ensureSSHKeypair() error { + privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519") + pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub") + + // Check if keypair already exists + if _, err := os.Stat(privKeyPath); err == nil { + if _, err := os.Stat(pubKeyPath); err == nil { + log.Info().Msg("SSH keypair already exists") + return nil + } + } + + log.Info().Msg("Generating new SSH keypair") + + // Generate ed25519 keypair using ssh-keygen + cmd := fmt.Sprintf("ssh-keygen -t ed25519 -f %s -N '' -C 'pulse-sensor-proxy'", privKeyPath) + if output, err := execCommand(cmd); err != nil { + return fmt.Errorf("failed to generate SSH keypair: %w (output: %s)", err, output) + } + + log.Info().Str("path", privKeyPath).Msg("SSH keypair generated") + return nil +} + +// handleEnsureClusterKeys discovers cluster nodes and pushes SSH keys +func (p *Proxy) handleEnsureClusterKeys(req RPCRequest) RPCResponse { + // Check if we're on a Proxmox host + if !isProxmoxHost() { + return RPCResponse{ + Success: false, + Error: "not running on Proxmox host - cannot discover cluster", + } + } + + // Discover cluster nodes + nodes, err := discoverClusterNodes() + if err != nil { + return RPCResponse{ + Success: false, + Error: fmt.Sprintf("failed to discover cluster: %v", err), + } + } + + log.Info().Strs("nodes", nodes).Msg("Discovered cluster nodes") + + // Push SSH key to each node + results := make(map[string]interface{}) + successCount := 0 + for _, node := range nodes { + log.Info().Str("node", node).Msg("Pushing SSH key to node") + if err := p.pushSSHKey(node); err != nil { + log.Error().Err(err).Str("node", node).Msg("Failed to push SSH key") + results[node] = map[string]interface{}{ + "success": false, + "error": err.Error(), + } + } else { + log.Info().Str("node", node).Msg("SSH key pushed successfully") + results[node] = map[string]interface{}{ + "success": true, + } + successCount++ + } + } + + return RPCResponse{ + Success: true, + Data: map[string]interface{}{ + "nodes": nodes, + "results": results, + "success_count": successCount, + "total_count": len(nodes), + }, + } +} + +// handleRegisterNodes returns discovered nodes +func (p *Proxy) handleRegisterNodes(req RPCRequest) RPCResponse { + // Check if we're on a Proxmox host + if !isProxmoxHost() { + return RPCResponse{ + Success: false, + Error: "not running on Proxmox host", + } + } + + // Discover cluster nodes + nodes, err := discoverClusterNodes() + if err != nil { + return RPCResponse{ + Success: false, + Error: fmt.Sprintf("failed to discover nodes: %v", err), + } + } + + // Test SSH connectivity to each node + nodeStatus := make([]map[string]interface{}, 0, len(nodes)) + for _, node := range nodes { + status := map[string]interface{}{ + "name": node, + } + + if err := p.testSSHConnection(node); err != nil { + status["ssh_ready"] = false + status["error"] = err.Error() + } else { + status["ssh_ready"] = true + } + + nodeStatus = append(nodeStatus, status) + } + + return RPCResponse{ + Success: true, + Data: map[string]interface{}{ + "nodes": nodeStatus, + }, + } +} + +// handleGetTemperature fetches temperature data from a node via SSH +func (p *Proxy) handleGetTemperature(req RPCRequest) RPCResponse { + // Extract node parameter + nodeParam, ok := req.Params["node"] + if !ok { + return RPCResponse{ + Success: false, + Error: "missing 'node' parameter", + } + } + + node, ok := nodeParam.(string) + if !ok { + return RPCResponse{ + Success: false, + Error: "'node' parameter must be a string", + } + } + + // Fetch temperature data + tempData, err := p.getTemperatureViaSSH(node) + if err != nil { + return RPCResponse{ + Success: false, + Error: fmt.Sprintf("failed to get temperatures: %v", err), + } + } + + return RPCResponse{ + Success: true, + Data: map[string]interface{}{ + "node": node, + "temperature": tempData, + }, + } +} + +// New V2 handlers with context and structured logging + +// handleGetStatusV2 returns proxy status with context support +func (p *Proxy) handleGetStatusV2(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) { + pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub") + pubKey, err := os.ReadFile(pubKeyPath) + if err != nil { + return nil, fmt.Errorf("failed to read public key: %w", err) + } + + logger.Info().Msg("Status request served") + return map[string]interface{}{ + "version": Version, + "public_key": string(pubKey), + "ssh_dir": p.sshKeyPath, + }, nil +} + +// handleEnsureClusterKeysV2 discovers cluster nodes and pushes SSH keys with validation +func (p *Proxy) handleEnsureClusterKeysV2(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) { + // Check if we're on a Proxmox host + if !isProxmoxHost() { + return nil, fmt.Errorf("not running on Proxmox host - cannot discover cluster") + } + + // Check for optional key_dir parameter (for key rotation) + keyDir := p.sshKeyPath // default + if keyDirParam, ok := req.Params["key_dir"]; ok { + if keyDirStr, ok := keyDirParam.(string); ok && keyDirStr != "" { + keyDir = keyDirStr + logger.Info().Str("key_dir", keyDir).Msg("Using custom key directory for rotation") + } + } + + // Discover cluster nodes + nodes, err := discoverClusterNodes() + if err != nil { + return nil, fmt.Errorf("failed to discover cluster: %w", err) + } + + logger.Info().Strs("nodes", nodes).Msg("Discovered cluster nodes") + + // Push SSH key to each node + results := make(map[string]interface{}) + successCount := 0 + for _, node := range nodes { + // Validate node name + if err := validateNodeName(node); err != nil { + logger.Warn().Str("node", node).Msg("Invalid node name format") + results[node] = map[string]interface{}{ + "success": false, + "error": "invalid node name", + } + continue + } + + logger.Info().Str("node", node).Str("key_dir", keyDir).Msg("Pushing SSH key to node") + if err := p.pushSSHKeyFrom(node, keyDir); err != nil { + logger.Error().Err(err).Str("node", node).Msg("Failed to push SSH key") + results[node] = map[string]interface{}{ + "success": false, + "error": err.Error(), + } + } else { + logger.Info().Str("node", node).Msg("SSH key pushed successfully") + results[node] = map[string]interface{}{ + "success": true, + } + successCount++ + } + } + + return map[string]interface{}{ + "nodes": nodes, + "results": results, + "success_count": successCount, + "total_count": len(nodes), + }, nil +} + +// handleRegisterNodesV2 returns discovered nodes with validation +func (p *Proxy) handleRegisterNodesV2(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) { + // Check if we're on a Proxmox host + if !isProxmoxHost() { + return nil, fmt.Errorf("not running on Proxmox host") + } + + // Discover cluster nodes + nodes, err := discoverClusterNodes() + if err != nil { + return nil, fmt.Errorf("failed to discover nodes: %w", err) + } + + // Test SSH connectivity to each node + nodeStatus := make([]map[string]interface{}, 0, len(nodes)) + for _, node := range nodes { + status := map[string]interface{}{ + "name": node, + } + + // Validate node name + if err := validateNodeName(node); err != nil { + status["ssh_ready"] = false + status["error"] = "invalid node name" + nodeStatus = append(nodeStatus, status) + continue + } + + if err := p.testSSHConnection(node); err != nil { + status["ssh_ready"] = false + status["error"] = err.Error() + } else { + status["ssh_ready"] = true + } + + nodeStatus = append(nodeStatus, status) + } + + logger.Info().Int("node_count", len(nodeStatus)).Msg("Node discovery completed") + return map[string]interface{}{ + "nodes": nodeStatus, + }, nil +} + +// handleGetTemperatureV2 fetches temperature data with concurrency control and validation +func (p *Proxy) handleGetTemperatureV2(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) { + // Extract node parameter + nodeParam, ok := req.Params["node"] + if !ok { + return nil, fmt.Errorf("missing 'node' parameter") + } + + node, ok := nodeParam.(string) + if !ok { + return nil, fmt.Errorf("'node' parameter must be a string") + } + + // Trim and validate node name + node = strings.TrimSpace(node) + if err := validateNodeName(node); err != nil { + logger.Warn().Str("node", node).Msg("Invalid node name format") + return nil, fmt.Errorf("invalid node name") + } + + // Acquire per-node concurrency lock (prevents multiple simultaneous requests to same node) + releaseNode := p.nodeGate.acquire(node) + defer releaseNode() + + logger.Debug().Str("node", node).Msg("Fetching temperature via SSH") + + // Fetch temperature data + tempData, err := p.getTemperatureViaSSH(node) + if err != nil { + logger.Warn().Err(err).Str("node", node).Msg("Failed to get temperatures") + return nil, fmt.Errorf("failed to get temperatures: %w", err) + } + + logger.Info().Str("node", node).Msg("Temperature data fetched successfully") + return map[string]interface{}{ + "node": node, + "temperature": tempData, + }, nil +} diff --git a/cmd/pulse-sensor-proxy/metrics.go b/cmd/pulse-sensor-proxy/metrics.go new file mode 100644 index 000000000..48955f1a9 --- /dev/null +++ b/cmd/pulse-sensor-proxy/metrics.go @@ -0,0 +1,167 @@ +package main + +import ( + "context" + "net" + "net/http" + "strings" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/rs/zerolog/log" +) + +const defaultMetricsAddr = "127.0.0.1:9127" + +// ProxyMetrics holds Prometheus metrics for the proxy +type ProxyMetrics struct { + rpcRequests *prometheus.CounterVec + rpcLatency *prometheus.HistogramVec + sshRequests *prometheus.CounterVec + sshLatency *prometheus.HistogramVec + queueDepth prometheus.Gauge + rateLimitHits prometheus.Counter + buildInfo *prometheus.GaugeVec + server *http.Server + registry *prometheus.Registry +} + +// NewProxyMetrics creates and registers all metrics +func NewProxyMetrics(version string) *ProxyMetrics { + reg := prometheus.NewRegistry() + + pm := &ProxyMetrics{ + rpcRequests: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "pulse_proxy_rpc_requests_total", + Help: "Total RPC requests handled by method and result.", + }, + []string{"method", "result"}, + ), + rpcLatency: prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "pulse_proxy_rpc_latency_seconds", + Help: "RPC handler latency.", + Buckets: []float64{0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2.5, 5}, + }, + []string{"method"}, + ), + sshRequests: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "pulse_proxy_ssh_requests_total", + Help: "SSH command executions by node and result.", + }, + []string{"node", "result"}, + ), + sshLatency: prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "pulse_proxy_ssh_latency_seconds", + Help: "SSH command latency per node.", + Buckets: []float64{0.1, 0.5, 1, 2.5, 5, 10, 30}, + }, + []string{"node"}, + ), + queueDepth: prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "pulse_proxy_queue_depth", + Help: "Concurrent RPC requests being processed.", + }, + ), + rateLimitHits: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "pulse_proxy_rate_limit_hits_total", + Help: "Number of RPC requests rejected due to rate limiting.", + }, + ), + buildInfo: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "pulse_proxy_build_info", + Help: "Proxy build metadata.", + }, + []string{"version"}, + ), + registry: reg, + } + + reg.MustRegister( + pm.rpcRequests, + pm.rpcLatency, + pm.sshRequests, + pm.sshLatency, + pm.queueDepth, + pm.rateLimitHits, + pm.buildInfo, + ) + + pm.buildInfo.WithLabelValues(version).Set(1) + + return pm +} + +// Start starts the metrics HTTP server on the specified address +func (m *ProxyMetrics) Start(addr string) error { + if addr == "" || strings.ToLower(addr) == "disabled" { + log.Info().Msg("Metrics server disabled") + return nil + } + + if addr == "default" { + addr = defaultMetricsAddr + } + + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})) + + ln, err := net.Listen("tcp", addr) + if err != nil { + return err + } + + m.server = &http.Server{ + Addr: addr, + Handler: mux, + ReadHeaderTimeout: 5 * time.Second, + } + + go func() { + if err := m.server.Serve(ln); err != nil && err != http.ErrServerClosed { + log.Error().Err(err).Str("addr", addr).Msg("Metrics server stopped unexpectedly") + } + }() + + log.Info().Str("addr", addr).Msg("Metrics server started") + return nil +} + +// Shutdown gracefully shuts down the metrics server +func (m *ProxyMetrics) Shutdown(ctx context.Context) { + if m.server != nil { + _ = m.server.Shutdown(ctx) + } +} + +// sanitizeNodeLabel converts a node name into a safe Prometheus label value +func sanitizeNodeLabel(node string) string { + const maxLen = 63 + safe := strings.Builder{} + safe.Grow(len(node)) + + for _, r := range strings.ToLower(node) { + if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' || r == '.' { + safe.WriteRune(r) + } else { + safe.WriteRune('_') + } + } + + out := safe.String() + if len(out) > maxLen { + out = out[:maxLen] + } + if out == "" { + out = "unknown" + } + + return out +} diff --git a/cmd/pulse-sensor-proxy/ssh.go b/cmd/pulse-sensor-proxy/ssh.go new file mode 100644 index 000000000..69bbdf387 --- /dev/null +++ b/cmd/pulse-sensor-proxy/ssh.go @@ -0,0 +1,242 @@ +package main + +import ( + "bytes" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/rs/zerolog/log" +) + +// execCommand executes a shell command and returns output +func execCommand(cmd string) (string, error) { + out, err := exec.Command("sh", "-c", cmd).CombinedOutput() + return string(out), err +} + +// getPublicKey reads the SSH public key from the default directory +func (p *Proxy) getPublicKey() (string, error) { + return p.getPublicKeyFrom(p.sshKeyPath) +} + +// getPublicKeyFrom reads the SSH public key from a specific directory +func (p *Proxy) getPublicKeyFrom(keyDir string) (string, error) { + pubKeyPath := filepath.Join(keyDir, "id_ed25519.pub") + data, err := os.ReadFile(pubKeyPath) + if err != nil { + return "", err + } + return strings.TrimSpace(string(data)), nil +} + +// buildAuthorizedKey constructs an authorized_keys entry with from= IP restrictions +func (p *Proxy) buildAuthorizedKey(pubKey string) (string, error) { + subnets := p.config.AllowedSourceSubnets + if len(subnets) == 0 { + return "", fmt.Errorf("no allowed source subnets configured or detected") + } + + // Build from= clause with all allowed subnets + fromClause := fmt.Sprintf(`from="%s"`, strings.Join(subnets, ",")) + + // Comment helps identify and upgrade this key later + const comment = "pulse-sensor-proxy" + + // Forced command with all restrictions + const forced = `command="sensors -j",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty` + + // Format: from="...",command="...",no-* ssh-rsa AAAA... pulse-sensor-proxy + return fmt.Sprintf(`%s,%s %s %s`, fromClause, forced, pubKey, comment), nil +} + +// pushSSHKeyFrom pushes a public key from a specific directory to a node +func (p *Proxy) pushSSHKeyFrom(nodeHost, keyDir string) error { + startTime := time.Now() + nodeLabel := sanitizeNodeLabel(nodeHost) + + pubKey, err := p.getPublicKeyFrom(keyDir) + if err != nil { + p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc() + p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds()) + return fmt.Errorf("failed to get public key from %s: %w", keyDir, err) + } + + // Build the restricted authorized_keys entry + entry, err := p.buildAuthorizedKey(pubKey) + if err != nil { + p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc() + p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds()) + return fmt.Errorf("failed to build authorized key: %w", err) + } + + // Check if the exact restricted entry already exists + checkCmd := fmt.Sprintf( + `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "grep -F '%s' /root/.ssh/authorized_keys 2>/dev/null"`, + nodeHost, + entry, + ) + + if output, _ := execCommand(checkCmd); strings.Contains(output, entry) { + log.Debug().Str("node", nodeHost).Msg("SSH key already present with from= restrictions") + p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc() + p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds()) + return nil + } + + // Remove old pulse-temp-proxy and pulse-sensor-proxy entries (for upgrade path) + removeOldCmd := fmt.Sprintf( + `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "mkdir -p /root/.ssh && chmod 700 /root/.ssh && grep -v -e 'pulse-temp-proxy$' -e 'pulse-sensor-proxy$' /root/.ssh/authorized_keys > /root/.ssh/authorized_keys.tmp 2>/dev/null || touch /root/.ssh/authorized_keys.tmp"`, + nodeHost, + ) + + if _, err := execCommand(removeOldCmd); err != nil { + p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc() + p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds()) + return fmt.Errorf("failed to prepare authorized_keys on %s: %w", nodeHost, err) + } + + // Add the new restricted key and atomically replace the file + addCmd := fmt.Sprintf( + `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "echo '%s' >> /root/.ssh/authorized_keys.tmp && mv /root/.ssh/authorized_keys.tmp /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"`, + nodeHost, + entry, + ) + + if _, err := execCommand(addCmd); err != nil { + p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc() + p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds()) + return fmt.Errorf("failed to add SSH key to %s: %w", nodeHost, err) + } + + log.Info(). + Str("node", nodeHost). + Str("key_dir", keyDir). + Strs("allowed_subnets", p.config.AllowedSourceSubnets). + Msg("SSH key installed with from= IP restrictions") + + p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc() + p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds()) + return nil +} + +// pushSSHKey adds the proxy's public key to a node's authorized_keys with IP restrictions +// Automatically upgrades old keys without from= restrictions +func (p *Proxy) pushSSHKey(nodeHost string) error { + return p.pushSSHKeyFrom(nodeHost, p.sshKeyPath) +} + +// testSSHConnection verifies SSH connectivity to a node +func (p *Proxy) testSSHConnection(nodeHost string) error { + startTime := time.Now() + nodeLabel := sanitizeNodeLabel(nodeHost) + + privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519") + cmd := fmt.Sprintf( + `ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s "echo test"`, + privKeyPath, + nodeHost, + ) + + output, err := execCommand(cmd) + if err != nil { + p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc() + p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds()) + return fmt.Errorf("SSH test failed: %w (output: %s)", err, output) + } + + // The forced command will run "sensors -j" instead of "echo test" + // So we should get JSON output, not "test" + // For now, just check that connection succeeded + p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc() + p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds()) + return nil +} + +// getTemperatureViaSSH fetches temperature data from a node +func (p *Proxy) getTemperatureViaSSH(nodeHost string) (string, error) { + startTime := time.Now() + nodeLabel := sanitizeNodeLabel(nodeHost) + + privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519") + + // Since we use ForceCommand="sensors -j", any SSH command will run sensors + // We don't need to specify the command + cmd := fmt.Sprintf( + `ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s ""`, + privKeyPath, + nodeHost, + ) + + output, err := execCommand(cmd) + if err != nil { + p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc() + p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds()) + return "", fmt.Errorf("failed to fetch temperatures: %w", err) + } + + p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc() + p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds()) + return output, nil +} + +// discoverClusterNodes discovers all nodes in the Proxmox cluster +func discoverClusterNodes() ([]string, error) { + // Check if pvecm is available (only on Proxmox hosts) + if _, err := exec.LookPath("pvecm"); err != nil { + return nil, fmt.Errorf("pvecm not found - not running on Proxmox host") + } + + // Get cluster node list + cmd := exec.Command("pvecm", "nodes") + var out bytes.Buffer + cmd.Stdout = &out + if err := cmd.Run(); err != nil { + return nil, fmt.Errorf("failed to get cluster nodes: %w", err) + } + + // Parse output + // Format: + // Membership information + // ---------------------- + // Nodeid Votes Name + // 1 1 node1 + // 2 1 node2 + + var nodes []string + lines := strings.Split(out.String(), "\n") + for _, line := range lines { + fields := strings.Fields(line) + // Skip header lines and empty lines + if len(fields) < 3 { + continue + } + // Check if first field is numeric (node ID) + if fields[0][0] >= '0' && fields[0][0] <= '9' { + nodeName := fields[2] + nodes = append(nodes, nodeName) + } + } + + if len(nodes) == 0 { + return nil, fmt.Errorf("no cluster nodes found") + } + + return nodes, nil +} + +// isProxmoxHost checks if we're running on a Proxmox host +func isProxmoxHost() bool { + // Check for pvecm command + if _, err := exec.LookPath("pvecm"); err == nil { + return true + } + // Check for /etc/pve directory + if info, err := os.Stat("/etc/pve"); err == nil && info.IsDir() { + return true + } + return false +} diff --git a/cmd/pulse-sensor-proxy/throttle.go b/cmd/pulse-sensor-proxy/throttle.go new file mode 100644 index 000000000..946de00df --- /dev/null +++ b/cmd/pulse-sensor-proxy/throttle.go @@ -0,0 +1,140 @@ +package main + +import ( + "sync" + "time" + + "golang.org/x/time/rate" +) + +// peerID identifies a connecting process by UID+PID +type peerID struct { + uid uint32 + pid uint32 +} + +// limiterEntry holds rate limiting and concurrency controls for a peer +type limiterEntry struct { + limiter *rate.Limiter // throughput: 20/min with burst 10 + semaphore chan struct{} // concurrency: cap 10 + lastSeen time.Time +} + +// rateLimiter manages per-peer rate limits and concurrency +type rateLimiter struct { + mu sync.Mutex + entries map[peerID]*limiterEntry + quitChan chan struct{} +} + +// newRateLimiter creates a new rate limiter with cleanup loop +func newRateLimiter() *rateLimiter { + rl := &rateLimiter{ + entries: make(map[peerID]*limiterEntry), + quitChan: make(chan struct{}), + } + go rl.cleanupLoop() + return rl +} + +// allow checks if a peer is allowed to make a request and reserves a concurrency slot +// Returns a release function and whether the request is allowed +func (rl *rateLimiter) allow(id peerID) (release func(), allowed bool) { + rl.mu.Lock() + entry := rl.entries[id] + if entry == nil { + entry = &limiterEntry{ + limiter: rate.NewLimiter(rate.Every(time.Minute/20), 10), // 20/min, burst 10 + semaphore: make(chan struct{}, 10), // max 10 concurrent + } + rl.entries[id] = entry + } + entry.lastSeen = time.Now() + rl.mu.Unlock() + + // Check rate limit + if !entry.limiter.Allow() { + return nil, false + } + + // Try to acquire concurrency slot + select { + case entry.semaphore <- struct{}{}: + return func() { <-entry.semaphore }, true + default: + return nil, false // max concurrent in-flight reached + } +} + +// cleanupLoop periodically removes idle peer entries +func (rl *rateLimiter) cleanupLoop() { + ticker := time.NewTicker(5 * time.Minute) + defer ticker.Stop() + for { + select { + case <-ticker.C: + rl.mu.Lock() + for id, entry := range rl.entries { + if time.Since(entry.lastSeen) > 10*time.Minute { + delete(rl.entries, id) + } + } + rl.mu.Unlock() + case <-rl.quitChan: + return + } + } +} + +// shutdown stops the cleanup loop +func (rl *rateLimiter) shutdown() { + close(rl.quitChan) +} + +// nodeGate controls per-node concurrency for temperature requests +type nodeGate struct { + mu sync.Mutex + inFlight map[string]*nodeLock +} + +// nodeLock tracks in-flight requests for a specific node +type nodeLock struct { + refCount int + guard chan struct{} +} + +// newNodeGate creates a new node concurrency gate +func newNodeGate() *nodeGate { + return &nodeGate{ + inFlight: make(map[string]*nodeLock), + } +} + +// acquire gets exclusive access to make requests to a node +// Returns a release function that must be called when done +func (g *nodeGate) acquire(node string) func() { + g.mu.Lock() + lock := g.inFlight[node] + if lock == nil { + lock = &nodeLock{ + guard: make(chan struct{}, 1), // single slot = only one SSH fetch per node + } + g.inFlight[node] = lock + } + lock.refCount++ + g.mu.Unlock() + + // Wait for exclusive access + lock.guard <- struct{}{} + + // Return release function + return func() { + <-lock.guard + g.mu.Lock() + lock.refCount-- + if lock.refCount == 0 { + delete(g.inFlight, node) + } + g.mu.Unlock() + } +} diff --git a/cmd/pulse-sensor-proxy/validation.go b/cmd/pulse-sensor-proxy/validation.go new file mode 100644 index 000000000..b0fbe13aa --- /dev/null +++ b/cmd/pulse-sensor-proxy/validation.go @@ -0,0 +1,33 @@ +package main + +import ( + "fmt" + "regexp" + + "github.com/google/uuid" +) + +var ( + // nodeNameRegex validates node names (alphanumeric, dots, underscores, hyphens, 1-64 chars) + nodeNameRegex = regexp.MustCompile(`^[a-zA-Z0-9._-]{1,64}$`) +) + +// sanitizeCorrelationID validates and sanitizes a correlation ID +// Returns a valid UUID, generating a new one if input is missing or invalid +func sanitizeCorrelationID(id string) string { + if id == "" { + return uuid.NewString() + } + if _, err := uuid.Parse(id); err != nil { + return uuid.NewString() + } + return id +} + +// validateNodeName checks if a node name is in valid format +func validateNodeName(name string) error { + if !nodeNameRegex.MatchString(name) { + return fmt.Errorf("invalid node name") + } + return nil +} diff --git a/cmd/pulse-temp-proxy/main.go b/cmd/pulse-temp-proxy/main.go deleted file mode 100644 index 504ac26fa..000000000 --- a/cmd/pulse-temp-proxy/main.go +++ /dev/null @@ -1,434 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "net" - "os" - "os/signal" - "path/filepath" - "syscall" - - "github.com/rs/zerolog" - "github.com/rs/zerolog/log" - "github.com/spf13/cobra" -) - -// Version information (set at build time with -ldflags) -var ( - Version = "dev" - BuildTime = "unknown" - GitCommit = "unknown" -) - -const ( - defaultSocketPath = "/run/pulse-temp-proxy/pulse-temp-proxy.sock" - defaultSSHKeyPath = "/var/lib/pulse-temp-proxy/ssh" -) - -var rootCmd = &cobra.Command{ - Use: "pulse-temp-proxy", - Short: "Pulse Temperature Proxy - Secure SSH bridge for containerized Pulse", - Long: `Temperature monitoring proxy that keeps SSH keys on the host and exposes temperature data via unix socket`, - Version: Version, - Run: func(cmd *cobra.Command, args []string) { - runProxy() - }, -} - -var versionCmd = &cobra.Command{ - Use: "version", - Short: "Print version information", - Run: func(cmd *cobra.Command, args []string) { - fmt.Printf("pulse-temp-proxy %s\n", Version) - if BuildTime != "unknown" { - fmt.Printf("Built: %s\n", BuildTime) - } - if GitCommit != "unknown" { - fmt.Printf("Commit: %s\n", GitCommit) - } - }, -} - -func init() { - rootCmd.AddCommand(versionCmd) -} - -func main() { - if err := rootCmd.Execute(); err != nil { - fmt.Fprintf(os.Stderr, "Error: %v\n", err) - os.Exit(1) - } -} - -// Proxy manages the temperature monitoring proxy -type Proxy struct { - socketPath string - sshKeyPath string - listener net.Listener -} - -// RPC request types -const ( - RPCEnsureClusterKeys = "ensure_cluster_keys" - RPCRegisterNodes = "register_nodes" - RPCGetTemperature = "get_temperature" - RPCGetStatus = "get_status" -) - -// RPCRequest represents a request from Pulse -type RPCRequest struct { - Method string `json:"method"` - Params map[string]interface{} `json:"params"` -} - -// RPCResponse represents a response to Pulse -type RPCResponse struct { - Success bool `json:"success"` - Data interface{} `json:"data,omitempty"` - Error string `json:"error,omitempty"` -} - -func runProxy() { - // Initialize logger - zerolog.TimeFieldFormat = zerolog.TimeFormatUnix - log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}) - - socketPath := os.Getenv("PULSE_TEMP_PROXY_SOCKET") - if socketPath == "" { - socketPath = defaultSocketPath - } - - sshKeyPath := os.Getenv("PULSE_TEMP_PROXY_SSH_DIR") - if sshKeyPath == "" { - sshKeyPath = defaultSSHKeyPath - } - - log.Info(). - Str("socket", socketPath). - Str("ssh_key_dir", sshKeyPath). - Msg("Starting pulse-temp-proxy") - - proxy := &Proxy{ - socketPath: socketPath, - sshKeyPath: sshKeyPath, - } - - if err := proxy.Start(); err != nil { - log.Fatal().Err(err).Msg("Failed to start proxy") - } - - // Setup signal handlers - sigChan := make(chan os.Signal, 1) - signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) - - <-sigChan - log.Info().Msg("Shutting down proxy...") - proxy.Stop() - log.Info().Msg("Proxy stopped") -} - -// Start initializes and starts the proxy -func (p *Proxy) Start() error { - // Create SSH key directory if it doesn't exist - if err := os.MkdirAll(p.sshKeyPath, 0700); err != nil { - return fmt.Errorf("failed to create SSH key directory: %w", err) - } - - // Ensure SSH keypair exists - if err := p.ensureSSHKeypair(); err != nil { - return fmt.Errorf("failed to ensure SSH keypair: %w", err) - } - - // Remove existing socket if it exists - if err := os.RemoveAll(p.socketPath); err != nil { - return fmt.Errorf("failed to remove existing socket: %w", err) - } - - // Create socket directory if needed - socketDir := filepath.Dir(p.socketPath) - if err := os.MkdirAll(socketDir, 0755); err != nil { - return fmt.Errorf("failed to create socket directory: %w", err) - } - - // Create unix socket listener - listener, err := net.Listen("unix", p.socketPath) - if err != nil { - return fmt.Errorf("failed to create unix socket: %w", err) - } - p.listener = listener - - // Set socket permissions to owner+group only - // We use SO_PEERCRED for authentication, so we don't need world-readable - if err := os.Chmod(p.socketPath, 0660); err != nil { - log.Warn().Err(err).Msg("Failed to set socket permissions") - } - - log.Info().Str("socket", p.socketPath).Msg("Unix socket ready") - - // Start accepting connections - go p.acceptConnections() - - return nil -} - -// Stop shuts down the proxy -func (p *Proxy) Stop() { - if p.listener != nil { - p.listener.Close() - os.Remove(p.socketPath) - } -} - -// acceptConnections handles incoming socket connections -func (p *Proxy) acceptConnections() { - for { - conn, err := p.listener.Accept() - if err != nil { - // Check if listener was closed - if opErr, ok := err.(*net.OpError); ok && opErr.Err.Error() == "use of closed network connection" { - return - } - log.Error().Err(err).Msg("Failed to accept connection") - continue - } - - go p.handleConnection(conn) - } -} - -// handleConnection processes a single RPC request -func (p *Proxy) handleConnection(conn net.Conn) { - defer conn.Close() - - // Verify peer credentials (SO_PEERCRED authentication) - if err := verifyPeerCredentials(conn); err != nil { - log.Warn().Err(err).Msg("Unauthorized connection attempt") - p.sendError(conn, "unauthorized") - return - } - - // Decode request - var req RPCRequest - decoder := json.NewDecoder(conn) - if err := decoder.Decode(&req); err != nil { - log.Error().Err(err).Msg("Failed to decode RPC request") - p.sendError(conn, "invalid request format") - return - } - - log.Debug().Str("method", req.Method).Msg("Received RPC request") - - // Route to handler - var resp RPCResponse - switch req.Method { - case RPCGetStatus: - resp = p.handleGetStatus(req) - case RPCEnsureClusterKeys: - resp = p.handleEnsureClusterKeys(req) - case RPCRegisterNodes: - resp = p.handleRegisterNodes(req) - case RPCGetTemperature: - resp = p.handleGetTemperature(req) - default: - resp = RPCResponse{ - Success: false, - Error: fmt.Sprintf("unknown method: %s", req.Method), - } - } - - // Send response - encoder := json.NewEncoder(conn) - if err := encoder.Encode(resp); err != nil { - log.Error().Err(err).Msg("Failed to encode RPC response") - } -} - -// sendError sends an error response -func (p *Proxy) sendError(conn net.Conn, message string) { - resp := RPCResponse{ - Success: false, - Error: message, - } - encoder := json.NewEncoder(conn) - encoder.Encode(resp) -} - -// handleGetStatus returns proxy status -func (p *Proxy) handleGetStatus(req RPCRequest) RPCResponse { - pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub") - pubKey, err := os.ReadFile(pubKeyPath) - if err != nil { - return RPCResponse{ - Success: false, - Error: fmt.Sprintf("failed to read public key: %v", err), - } - } - - return RPCResponse{ - Success: true, - Data: map[string]interface{}{ - "version": Version, - "public_key": string(pubKey), - "ssh_dir": p.sshKeyPath, - }, - } -} - -// ensureSSHKeypair generates SSH keypair if it doesn't exist -func (p *Proxy) ensureSSHKeypair() error { - privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519") - pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub") - - // Check if keypair already exists - if _, err := os.Stat(privKeyPath); err == nil { - if _, err := os.Stat(pubKeyPath); err == nil { - log.Info().Msg("SSH keypair already exists") - return nil - } - } - - log.Info().Msg("Generating new SSH keypair") - - // Generate ed25519 keypair using ssh-keygen - cmd := fmt.Sprintf("ssh-keygen -t ed25519 -f %s -N '' -C 'pulse-temp-proxy'", privKeyPath) - if output, err := execCommand(cmd); err != nil { - return fmt.Errorf("failed to generate SSH keypair: %w (output: %s)", err, output) - } - - log.Info().Str("path", privKeyPath).Msg("SSH keypair generated") - return nil -} - -// handleEnsureClusterKeys discovers cluster nodes and pushes SSH keys -func (p *Proxy) handleEnsureClusterKeys(req RPCRequest) RPCResponse { - // Check if we're on a Proxmox host - if !isProxmoxHost() { - return RPCResponse{ - Success: false, - Error: "not running on Proxmox host - cannot discover cluster", - } - } - - // Discover cluster nodes - nodes, err := discoverClusterNodes() - if err != nil { - return RPCResponse{ - Success: false, - Error: fmt.Sprintf("failed to discover cluster: %v", err), - } - } - - log.Info().Strs("nodes", nodes).Msg("Discovered cluster nodes") - - // Push SSH key to each node - results := make(map[string]interface{}) - successCount := 0 - for _, node := range nodes { - log.Info().Str("node", node).Msg("Pushing SSH key to node") - if err := p.pushSSHKey(node); err != nil { - log.Error().Err(err).Str("node", node).Msg("Failed to push SSH key") - results[node] = map[string]interface{}{ - "success": false, - "error": err.Error(), - } - } else { - log.Info().Str("node", node).Msg("SSH key pushed successfully") - results[node] = map[string]interface{}{ - "success": true, - } - successCount++ - } - } - - return RPCResponse{ - Success: true, - Data: map[string]interface{}{ - "nodes": nodes, - "results": results, - "success_count": successCount, - "total_count": len(nodes), - }, - } -} - -// handleRegisterNodes returns discovered nodes -func (p *Proxy) handleRegisterNodes(req RPCRequest) RPCResponse { - // Check if we're on a Proxmox host - if !isProxmoxHost() { - return RPCResponse{ - Success: false, - Error: "not running on Proxmox host", - } - } - - // Discover cluster nodes - nodes, err := discoverClusterNodes() - if err != nil { - return RPCResponse{ - Success: false, - Error: fmt.Sprintf("failed to discover nodes: %v", err), - } - } - - // Test SSH connectivity to each node - nodeStatus := make([]map[string]interface{}, 0, len(nodes)) - for _, node := range nodes { - status := map[string]interface{}{ - "name": node, - } - - if err := p.testSSHConnection(node); err != nil { - status["ssh_ready"] = false - status["error"] = err.Error() - } else { - status["ssh_ready"] = true - } - - nodeStatus = append(nodeStatus, status) - } - - return RPCResponse{ - Success: true, - Data: map[string]interface{}{ - "nodes": nodeStatus, - }, - } -} - -// handleGetTemperature fetches temperature data from a node via SSH -func (p *Proxy) handleGetTemperature(req RPCRequest) RPCResponse { - // Extract node parameter - nodeParam, ok := req.Params["node"] - if !ok { - return RPCResponse{ - Success: false, - Error: "missing 'node' parameter", - } - } - - node, ok := nodeParam.(string) - if !ok { - return RPCResponse{ - Success: false, - Error: "'node' parameter must be a string", - } - } - - // Fetch temperature data - tempData, err := p.getTemperatureViaSSH(node) - if err != nil { - return RPCResponse{ - Success: false, - Error: fmt.Sprintf("failed to get temperatures: %v", err), - } - } - - return RPCResponse{ - Success: true, - Data: map[string]interface{}{ - "node": node, - "temperature": tempData, - }, - } -} diff --git a/cmd/pulse-temp-proxy/ssh.go b/cmd/pulse-temp-proxy/ssh.go deleted file mode 100644 index 23a9d7af8..000000000 --- a/cmd/pulse-temp-proxy/ssh.go +++ /dev/null @@ -1,161 +0,0 @@ -package main - -import ( - "bytes" - "fmt" - "os" - "os/exec" - "path/filepath" - "strings" -) - -// execCommand executes a shell command and returns output -func execCommand(cmd string) (string, error) { - out, err := exec.Command("sh", "-c", cmd).CombinedOutput() - return string(out), err -} - -// getPublicKey reads the SSH public key -func (p *Proxy) getPublicKey() (string, error) { - pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub") - data, err := os.ReadFile(pubKeyPath) - if err != nil { - return "", err - } - return strings.TrimSpace(string(data)), nil -} - -// pushSSHKey adds the proxy's public key to a node's authorized_keys with restrictions -func (p *Proxy) pushSSHKey(nodeHost string) error { - pubKey, err := p.getPublicKey() - if err != nil { - return fmt.Errorf("failed to get public key: %w", err) - } - - // Create forced command entry with restrictions - // This limits the key to only running "sensors -j" - authorizedKey := fmt.Sprintf(`command="sensors -j",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty %s`, pubKey) - - // Build SSH command to add key to remote node - // First, check if key already exists to avoid duplicates - checkCmd := fmt.Sprintf( - `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "grep -F '%s' /root/.ssh/authorized_keys 2>/dev/null"`, - nodeHost, - pubKey, - ) - - if output, _ := execCommand(checkCmd); strings.Contains(output, pubKey) { - return nil // Key already exists - } - - // Add the key - addCmd := fmt.Sprintf( - `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "mkdir -p /root/.ssh && chmod 700 /root/.ssh && echo '%s' >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"`, - nodeHost, - authorizedKey, - ) - - if _, err := execCommand(addCmd); err != nil { - return fmt.Errorf("failed to add SSH key to %s: %w", nodeHost, err) - } - - return nil -} - -// testSSHConnection verifies SSH connectivity to a node -func (p *Proxy) testSSHConnection(nodeHost string) error { - privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519") - cmd := fmt.Sprintf( - `ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s "echo test"`, - privKeyPath, - nodeHost, - ) - - output, err := execCommand(cmd) - if err != nil { - return fmt.Errorf("SSH test failed: %w (output: %s)", err, output) - } - - // The forced command will run "sensors -j" instead of "echo test" - // So we should get JSON output, not "test" - // For now, just check that connection succeeded - return nil -} - -// getTemperatureViaSSH fetches temperature data from a node -func (p *Proxy) getTemperatureViaSSH(nodeHost string) (string, error) { - privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519") - - // Since we use ForceCommand="sensors -j", any SSH command will run sensors - // We don't need to specify the command - cmd := fmt.Sprintf( - `ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s ""`, - privKeyPath, - nodeHost, - ) - - output, err := execCommand(cmd) - if err != nil { - return "", fmt.Errorf("failed to fetch temperatures: %w", err) - } - - return output, nil -} - -// discoverClusterNodes discovers all nodes in the Proxmox cluster -func discoverClusterNodes() ([]string, error) { - // Check if pvecm is available (only on Proxmox hosts) - if _, err := exec.LookPath("pvecm"); err != nil { - return nil, fmt.Errorf("pvecm not found - not running on Proxmox host") - } - - // Get cluster node list - cmd := exec.Command("pvecm", "nodes") - var out bytes.Buffer - cmd.Stdout = &out - if err := cmd.Run(); err != nil { - return nil, fmt.Errorf("failed to get cluster nodes: %w", err) - } - - // Parse output - // Format: - // Membership information - // ---------------------- - // Nodeid Votes Name - // 1 1 node1 - // 2 1 node2 - - var nodes []string - lines := strings.Split(out.String(), "\n") - for _, line := range lines { - fields := strings.Fields(line) - // Skip header lines and empty lines - if len(fields) < 3 { - continue - } - // Check if first field is numeric (node ID) - if fields[0][0] >= '0' && fields[0][0] <= '9' { - nodeName := fields[2] - nodes = append(nodes, nodeName) - } - } - - if len(nodes) == 0 { - return nil, fmt.Errorf("no cluster nodes found") - } - - return nodes, nil -} - -// isProxmoxHost checks if we're running on a Proxmox host -func isProxmoxHost() bool { - // Check for pvecm command - if _, err := exec.LookPath("pvecm"); err == nil { - return true - } - // Check for /etc/pve directory - if info, err := os.Stat("/etc/pve"); err == nil && info.IsDir() { - return true - } - return false -} diff --git a/docs/PULSE_SENSOR_PROXY_HARDENING.md b/docs/PULSE_SENSOR_PROXY_HARDENING.md new file mode 100644 index 000000000..9e62d8ba5 --- /dev/null +++ b/docs/PULSE_SENSOR_PROXY_HARDENING.md @@ -0,0 +1,962 @@ +# Pulse Temperature Proxy - Security Hardening Guide + +## Overview + +The `pulse-sensor-proxy` is a host-side service that provides secure temperature monitoring for containerized Pulse deployments. It addresses a critical security concern: SSH keys stored inside LXC containers can be exfiltrated if the container is compromised. + +**Architecture:** +- Host-side proxy runs with minimal privileges on each Proxmox node +- Containerized Pulse communicates via Unix socket (`/run/pulse-sensor-proxy/pulse-sensor-proxy.sock`) +- Proxy authenticates containers using Linux `SO_PEERCRED` (UID/PID verification) +- SSH keys never leave the host filesystem + +**Threat Model:** +- ✅ Container compromise cannot access SSH keys +- ✅ Container cannot directly SSH to cluster nodes +- ✅ Rate limiting prevents abuse via socket +- ✅ IP restrictions on SSH keys limit lateral movement +- ✅ Audit logging tracks all temperature requests + +## Prerequisites + +- Proxmox VE 7.0+ or Proxmox Backup Server 2.0+ +- LXC container running Pulse (unprivileged recommended) +- Root access to Proxmox host(s) +- `lm-sensors` installed on all nodes +- Cluster SSH access configured (root passwordless SSH between nodes) + +## Host Hardening + +### Service Account + +The proxy runs as the `pulse-sensor-proxy` user with these characteristics: +- System account (no login shell: `/usr/sbin/nologin`) +- No home directory +- Dedicated group: `pulse-sensor-proxy` +- Owns `/var/lib/pulse-sensor-proxy` and `/run/pulse-sensor-proxy` + +**Verify service account:** +```bash +# Check user exists +id pulse-sensor-proxy + +# Expected output: +# uid=XXX(pulse-sensor-proxy) gid=XXX(pulse-sensor-proxy) groups=XXX(pulse-sensor-proxy) + +# Check shell (should be /usr/sbin/nologin) +getent passwd pulse-sensor-proxy | cut -d: -f7 +``` + +### Systemd Unit Security + +The systemd unit includes comprehensive hardening directives: + +**Key security features:** +- `User=pulse-sensor-proxy` / `Group=pulse-sensor-proxy` - Unprivileged execution +- `NoNewPrivileges=true` - Prevents privilege escalation +- `ProtectSystem=strict` - Read-only `/usr`, `/boot`, `/efi` +- `ProtectHome=true` - Inaccessible `/home`, `/root`, `/run/user` +- `PrivateTmp=true` - Isolated `/tmp` and `/var/tmp` +- `SystemCallFilter=@system-service` - Restricted syscalls +- `CapabilityBoundingSet=` - No capabilities granted +- `RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6` - Socket restrictions + +**Verify systemd security:** +```bash +# Check service status +systemctl status pulse-sensor-proxy + +# Verify user/group +ps aux | grep pulse-sensor-proxy | grep -v grep + +# Expected: pulse-sensor-proxy user, not root + +# Check systemd security settings +systemctl show pulse-sensor-proxy | grep -E '(User=|NoNewPrivileges|ProtectSystem|CapabilityBoundingSet)' +``` + +### File Permissions + +**Critical paths and ownership:** +``` +/var/lib/pulse-sensor-proxy/ pulse-sensor-proxy:pulse-sensor-proxy 0750 +├── ssh/ pulse-sensor-proxy:pulse-sensor-proxy 0700 +│ ├── id_ed25519 pulse-sensor-proxy:pulse-sensor-proxy 0600 +│ └── id_ed25519.pub pulse-sensor-proxy:pulse-sensor-proxy 0640 +└── ssh.d/ pulse-sensor-proxy:pulse-sensor-proxy 0750 + ├── next/ pulse-sensor-proxy:pulse-sensor-proxy 0750 + └── prev/ pulse-sensor-proxy:pulse-sensor-proxy 0750 + +/run/pulse-sensor-proxy/ pulse-sensor-proxy:pulse-sensor-proxy 0775 +└── pulse-sensor-proxy.sock pulse-sensor-proxy:pulse-sensor-proxy 0777 +``` + +**Verify permissions:** +```bash +# Check base directory +ls -ld /var/lib/pulse-sensor-proxy/ +# Expected: drwxr-x--- pulse-sensor-proxy pulse-sensor-proxy + +# Check SSH keys +ls -l /var/lib/pulse-sensor-proxy/ssh/ +# Expected: +# -rw------- pulse-sensor-proxy pulse-sensor-proxy id_ed25519 +# -rw-r----- pulse-sensor-proxy pulse-sensor-proxy id_ed25519.pub + +# Check socket directory (note: 0775 for container access) +ls -ld /run/pulse-sensor-proxy/ +# Expected: drwxrwxr-x pulse-sensor-proxy pulse-sensor-proxy +``` + +**Why 0775 on socket directory?** +The socket directory needs `0775` (not `0770`) to allow the container's unprivileged UID (e.g., 1001) to traverse into the directory and access the socket. The socket itself is `0777` as access control is enforced via `SO_PEERCRED`. + +## LXC Container Requirements + +### Configuration Summary + +| Setting | Value | Purpose | +|---------|-------|---------| +| `lxc.idmap` | `u 0 100000 65536`
`g 0 100000 65536` | Unprivileged UID/GID mapping | +| `lxc.apparmor.profile` | `generated` or custom | AppArmor confinement | +| `lxc.cap.drop` | `sys_admin` (optional) | Drop dangerous capabilities | +| `lxc.mount.entry` | Directory-level bind mount | Socket access from container | + +### Sample LXC Configuration + +**In `/etc/pve/lxc/.conf`:** +```ini +# Unprivileged container (required) +unprivileged: 1 + +# AppArmor profile (recommended) +lxc.apparmor.profile: generated + +# Drop CAP_SYS_ADMIN if feasible (optional but recommended) +# WARNING: May break some container management operations +lxc.cap.drop: sys_admin + +# Bind mount proxy socket directory (REQUIRED) +# Note: Directory-level mount, not socket-level (socket is recreated by systemd) +lxc.mount.entry: /run/pulse-sensor-proxy run/pulse-sensor-proxy none bind,create=dir 0 0 +``` + +**Key points:** +- **Directory-level mount**: Mount `/run/pulse-sensor-proxy` directory, not the socket file itself +- **Why directory mount?** Systemd recreates the socket on restart; socket-level mounts break on recreation +- **Mode 0775**: Socket directory needs group+other execute permissions for container UID traversal +- **Socket 0777**: Actual socket is world-writable; security enforced via `SO_PEERCRED` authentication + +### Runtime Verification + +**Check container is unprivileged:** +```bash +# On host +pct config | grep unprivileged +# Expected: unprivileged: 1 + +# Inside container +cat /proc/self/uid_map +# Expected: 0 100000 65536 (or similar) +# NOT: 0 0 4294967295 (privileged) +``` + +**Check AppArmor confinement:** +```bash +# Inside container +cat /proc/self/attr/current +# Expected: lxc-_ (enforcing) or similar +# NOT: unconfined +``` + +**Check namespace isolation:** +```bash +# Inside container +ls -li /proc/self/ns/ +# Each namespace should have a unique inode number, different from host +``` + +**Check capabilities:** +```bash +# Inside container +capsh --print | grep Current +# Should show limited capability set +# If lxc.cap.drop: sys_admin is set, CAP_SYS_ADMIN should be absent +``` + +**Check bind mount:** +```bash +# Inside container +ls -la /run/pulse-sensor-proxy/ +# Expected: pulse-sensor-proxy.sock visible + +# Test socket access (requires Pulse to attempt connection) +socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock +# Should connect (may timeout waiting for input, but connection succeeds) +``` + +## Key Management + +### SSH Key Restrictions + +All SSH keys deployed to cluster nodes include these restrictions: +- `command="sensors -j"` - Forced command (only sensors allowed) +- `from=""` - IP address restrictions +- `no-port-forwarding` - Disable port forwarding +- `no-X11-forwarding` - Disable X11 forwarding +- `no-agent-forwarding` - Disable agent forwarding +- `no-pty` - Disable PTY allocation + +**Example authorized_keys entry:** +``` +from="192.168.0.0/24,10.0.0.0/8",command="sensors -j",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty ssh-ed25519 AAAA... pulse-sensor-proxy +``` + +**Configure allowed subnets:** + +Create `/etc/pulse-sensor-proxy/config.yaml`: +```yaml +allowed_source_subnets: + - "192.168.0.0/24" # LAN subnet + - "10.0.0.0/8" # VPN subnet +``` + +Or use environment variable: +```bash +# In /etc/default/pulse-sensor-proxy (loaded by systemd) +PULSE_SENSOR_PROXY_ALLOWED_SUBNETS="192.168.0.0/24,10.0.0.0/8" +``` + +**Auto-detection:** +If no subnets are configured, the proxy auto-detects host IP addresses and uses them as `/32` (IPv4) or `/128` (IPv6) CIDRs. This is secure but brittle (breaks if host IP changes). Explicit configuration is recommended. + +**Verify SSH restrictions:** +```bash +# On any cluster node +grep pulse-sensor-proxy /root/.ssh/authorized_keys + +# Expected format: +# from="...",command="sensors -j",no-* ssh-ed25519 AAAA... pulse-sensor-proxy +``` + +### Key Rotation + +**Rotation cadence:** +- Recommended: Every 90 days +- Minimum: Every 180 days +- After incident: Immediately + +**Rotation workflow:** + +The `pulse-sensor-proxy-rotate-keys.sh` script performs staged rotation with verification: + +1. **Dry-run (recommended first):** + ```bash + /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --dry-run + ``` + Shows what would happen without making changes. + +2. **Perform rotation:** + ```bash + /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh + ``` + + **What happens:** + - Generates new Ed25519 keypair in `/var/lib/pulse-sensor-proxy/ssh.d/next/` + - Pushes new key to all cluster nodes (via RPC `ensure_cluster_keys`) + - Verifies SSH connectivity with new key on each node + - Atomically swaps keys: + - Current `/ssh/` → `/ssh.d/prev/` (backup) + - Staging `/ssh.d/next/` → `/ssh/` (active) + - Old keys preserved in `/ssh.d/prev/` for rollback + +3. **If rotation fails, rollback:** + ```bash + /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --rollback + ``` + + Restores previous keypair from `/ssh.d/prev/` and re-pushes to cluster nodes. + +**Post-rotation verification:** +```bash +# Check new key timestamp +stat /var/lib/pulse-sensor-proxy/ssh/id_ed25519 + +# Verify all nodes have new key +for node in pve1 pve2 pve3; do + echo "=== $node ===" + ssh root@$node "grep pulse-sensor-proxy /root/.ssh/authorized_keys | tail -1" +done + +# Test temperature fetch via proxy +curl -s --unix-socket /run/pulse-sensor-proxy/pulse-sensor-proxy.sock \ + -d '{"correlation_id":"test","method":"get_temp","params":{"node":"pve1"}}' \ + | jq . +``` + +### Automated Rotation (Optional) + +**Create systemd timer:** + +`/etc/systemd/system/pulse-sensor-proxy-key-rotation.service`: +```ini +[Unit] +Description=Rotate pulse-sensor-proxy SSH keys +After=pulse-sensor-proxy.service +Requires=pulse-sensor-proxy.service + +[Service] +Type=oneshot +ExecStart=/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh +StandardOutput=journal +StandardError=journal +``` + +`/etc/systemd/system/pulse-sensor-proxy-key-rotation.timer`: +```ini +[Unit] +Description=Rotate pulse-sensor-proxy SSH keys every 90 days +Requires=pulse-sensor-proxy-key-rotation.service + +[Timer] +OnCalendar=quarterly +RandomizedDelaySec=1h +Persistent=true + +[Install] +WantedBy=timers.target +``` + +**Enable timer:** +```bash +systemctl daemon-reload +systemctl enable --now pulse-sensor-proxy-key-rotation.timer + +# Check next run +systemctl list-timers pulse-sensor-proxy-key-rotation.timer +``` + +## Monitoring & Auditing + +### Metrics Endpoint + +The proxy exposes Prometheus metrics on `127.0.0.1:9127` by default. + +**Available metrics:** +- `pulse_proxy_rpc_requests_total{method, result}` - RPC request counter +- `pulse_proxy_rpc_latency_seconds{method}` - RPC handler latency histogram +- `pulse_proxy_ssh_requests_total{node, result}` - SSH request counter per node +- `pulse_proxy_ssh_latency_seconds{node}` - SSH latency histogram per node +- `pulse_proxy_queue_depth` - Concurrent RPC requests (gauge) +- `pulse_proxy_rate_limit_hits_total` - Rejected requests due to rate limiting +- `pulse_proxy_build_info{version}` - Build metadata + +**Configure metrics address:** + +In `/etc/default/pulse-sensor-proxy`: +```bash +# Listen on all interfaces (WARNING: exposes metrics externally) +PULSE_SENSOR_PROXY_METRICS_ADDR="0.0.0.0:9127" + +# Disable metrics +PULSE_SENSOR_PROXY_METRICS_ADDR="disabled" +``` + +**Test metrics endpoint:** +```bash +curl -s http://127.0.0.1:9127/metrics | grep pulse_proxy +``` + +### Prometheus Integration + +**Sample scrape configuration:** + +```yaml +scrape_configs: + - job_name: 'pulse-sensor-proxy' + static_configs: + - targets: + - 'pve1:9127' + - 'pve2:9127' + - 'pve3:9127' + relabel_configs: + - source_labels: [__address__] + regex: '([^:]+):.+' + target_label: instance +``` + +### Alert Rules + +**Recommended Prometheus alerts:** + +```yaml +groups: + - name: pulse-sensor-proxy + rules: + # High SSH failure rate + - alert: PulseProxySSHFailureRate + expr: | + rate(pulse_proxy_ssh_requests_total{result="error"}[5m]) > 0.1 + for: 5m + labels: + severity: warning + annotations: + summary: "High SSH failure rate on {{ $labels.instance }}" + description: "{{ $value | humanize }} SSH requests/sec failing" + + # Rate limiting active + - alert: PulseProxyRateLimiting + expr: | + rate(pulse_proxy_rate_limit_hits_total[5m]) > 0 + for: 5m + labels: + severity: warning + annotations: + summary: "Rate limiting active on {{ $labels.instance }}" + description: "Proxy rejecting requests due to rate limits" + + # High queue depth + - alert: PulseProxyQueueDepth + expr: pulse_proxy_queue_depth > 5 + for: 5m + labels: + severity: warning + annotations: + summary: "High RPC queue depth on {{ $labels.instance }}" + description: "{{ $value }} concurrent requests (threshold: 5)" + + # Proxy down + - alert: PulseProxyDown + expr: up{job="pulse-sensor-proxy"} == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "Pulse proxy down on {{ $labels.instance }}" +``` + +### Audit Logging + +**Log format:** +All RPC requests are logged with structured fields: +- `corr_id` - Correlation ID (UUID, tracks request lifecycle) +- `uid` / `pid` - Peer credentials from `SO_PEERCRED` +- `method` - RPC method called (`get_temp`, `register_nodes`, `ensure_cluster_keys`) + +**Example log entries:** +```json +{"level":"info","corr_id":"a7f3d..","uid":1001,"pid":12345,"method":"get_temp","node":"pve1","msg":"RPC request"} +{"level":"info","corr_id":"a7f3d..","node":"pve1","latency_ms":245,"msg":"Temperature fetch successful"} +``` + +**Query logs:** +```bash +# All RPC requests in last hour +journalctl -u pulse-sensor-proxy --since "1 hour ago" -o json | \ + jq -r 'select(.corr_id != null) | [.corr_id, .uid, .method, .node] | @tsv' + +# Failed SSH requests +journalctl -u pulse-sensor-proxy --since today | grep -E '(SSH.*failed|error)' + +# Rate limit hits +journalctl -u pulse-sensor-proxy --since today | grep "rate limit" + +# Specific correlation ID +journalctl -u pulse-sensor-proxy | grep "corr_id=a7f3d" +``` + +### Rate Limiting + +**Current limits (per peer UID+PID):** +- **Rate**: 20 requests/minute (token bucket with burst) +- **Burst**: 10 requests +- **Concurrency**: 10 simultaneous requests + +**Behavior on limit exceeded:** +- Request rejected immediately (no queuing) +- `pulse_proxy_rate_limit_hits_total` metric incremented +- Log entry: `"Rate limit exceeded"` +- HTTP-like semantics: Similar to 429 Too Many Requests + +**Adjust limits:** + +Limits are hardcoded in `throttle.go`. To adjust, modify and rebuild: +```go +// cmd/pulse-sensor-proxy/throttle.go +const ( + requestsPerMin = 20 // Change this + requestBurst = 10 // Change this + maxConcurrent = 10 // Change this +) +``` + +Then rebuild and restart: +```bash +go build -v ./cmd/pulse-sensor-proxy +systemctl restart pulse-sensor-proxy +``` + +## Incident Response + +### Suspected Compromise Checklist + +**If the proxy or host is suspected compromised:** + +1. **Isolate immediately:** + ```bash + # Stop proxy service + systemctl stop pulse-sensor-proxy + + # Block outbound SSH from host (if applicable) + iptables -A OUTPUT -p tcp --dport 22 -j REJECT + ``` + +2. **Rotate all keys:** + ```bash + # Remove compromised keys from all nodes + for node in pve1 pve2 pve3; do + ssh root@$node "sed -i '/pulse-sensor-proxy/d' /root/.ssh/authorized_keys" + done + + # Generate new keys (don't use rotation script - may be compromised) + rm -rf /var/lib/pulse-sensor-proxy/ssh* + mkdir -p /var/lib/pulse-sensor-proxy/ssh + ssh-keygen -t ed25519 -N '' -C "pulse-sensor-proxy emergency $(date -u +%Y%m%dT%H%M%SZ)" \ + -f /var/lib/pulse-sensor-proxy/ssh/id_ed25519 + chown -R pulse-sensor-proxy:pulse-sensor-proxy /var/lib/pulse-sensor-proxy/ssh + chmod 0700 /var/lib/pulse-sensor-proxy/ssh + chmod 0600 /var/lib/pulse-sensor-proxy/ssh/id_ed25519 + chmod 0640 /var/lib/pulse-sensor-proxy/ssh/id_ed25519.pub + ``` + +3. **Audit logs:** + ```bash + # Export all proxy logs + journalctl -u pulse-sensor-proxy --since "7 days ago" > /tmp/proxy-audit-$(date +%s).log + + # Look for anomalies: + # - Unusual correlation IDs + # - High rate limit hits + # - Unexpected UIDs/PIDs + # - SSH errors to unexpected nodes + ``` + +4. **Reinstall proxy:** + ```bash + # Re-run installation script + /opt/pulse/scripts/install-temp-proxy.sh + + # Verify service status + systemctl status pulse-sensor-proxy + ``` + +5. **Re-push keys:** + ```bash + # Use proxy RPC to push new keys + /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh + ``` + +6. **Verify no persistence mechanisms:** + ```bash + # Check for unexpected systemd units + systemctl list-units --all | grep -i proxy + + # Check for unexpected cron jobs + crontab -l -u pulse-sensor-proxy + + # Check for unauthorized files in /var/lib/pulse-sensor-proxy + find /var/lib/pulse-sensor-proxy -type f ! -path '*/ssh/*' ! -path '*/ssh.d/*' + ``` + +### Post-Incident Hardening + +After an incident, consider: +- **Audit all LXC containers** for unexpected privilege escalation +- **Review bind mounts** on all containers (check for unauthorized mounts) +- **Enable full syscall auditing** (`auditd`) on host +- **Restrict network access** to proxy metrics endpoint (firewall `127.0.0.1:9127`) +- **Implement log aggregation** (forward `journald` to central SIEM) + +## Testing & Rollout + +### Development Testing + +Before deploying to production, verify the implementation with these safe tests: + +**1. Build Verification:** +```bash +# Compile proxy +cd /opt/pulse +go build -v ./cmd/pulse-sensor-proxy + +# Verify binary +./pulse-sensor-proxy version +# Expected: pulse-sensor-proxy dev (or version number) + +# Check help output +./pulse-sensor-proxy --help +``` + +**2. Rotation Script Syntax:** +```bash +# Syntax check +bash -n /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh + +# Help output +/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --help + +# Dry-run (requires root and socket) +sudo /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --dry-run +``` + +**3. Configuration Validation:** +```bash +# Test config file parsing +cat > /tmp/test-config.yaml < /tmp/pulse-sensor-proxy-status-before.txt + ``` + +2. **Create service account:** + ```bash + # Run install script or manually create + if ! id -u pulse-sensor-proxy >/dev/null 2>&1; then + useradd --system --user-group --no-create-home --shell /usr/sbin/nologin pulse-sensor-proxy + fi + ``` + +3. **Update file ownership:** + ```bash + chown -R pulse-sensor-proxy:pulse-sensor-proxy /var/lib/pulse-sensor-proxy/ + chmod 0750 /var/lib/pulse-sensor-proxy/ + chmod 0700 /var/lib/pulse-sensor-proxy/ssh/ + chmod 0600 /var/lib/pulse-sensor-proxy/ssh/id_ed25519 + chmod 0640 /var/lib/pulse-sensor-proxy/ssh/id_ed25519.pub + ``` + +**Phase 2: Deploy Hardened Version** + +1. **Build and install binary:** + ```bash + cd /opt/pulse + go build -v -o /tmp/pulse-sensor-proxy ./cmd/pulse-sensor-proxy + + # Verify build + /tmp/pulse-sensor-proxy version + + # Install + sudo install -m 0755 -o root -g root /tmp/pulse-sensor-proxy /usr/local/bin/pulse-sensor-proxy + ``` + +2. **Install hardened systemd unit:** + ```bash + # Copy hardened unit + sudo cp /opt/pulse/scripts/pulse-sensor-proxy.service /etc/systemd/system/ + + # Verify syntax + systemd-analyze verify /etc/systemd/system/pulse-sensor-proxy.service + + # Reload systemd + sudo systemctl daemon-reload + ``` + +3. **Update RuntimeDirectoryMode for LXC access:** + ```bash + # Ensure socket directory is accessible from container + sudo mkdir -p /etc/systemd/system/pulse-sensor-proxy.service.d/ + cat | sudo tee /etc/systemd/system/pulse-sensor-proxy.service.d/lxc-access.conf <<'EOF' +[Service] +RuntimeDirectoryMode=0775 +EOF + + sudo systemctl daemon-reload + ``` + +**Phase 3: Restart and Verify** + +1. **Restart service:** + ```bash + sudo systemctl restart pulse-sensor-proxy + + # Check status + sudo systemctl status pulse-sensor-proxy + ``` + +2. **Verify service user:** + ```bash + ps aux | grep pulse-sensor-proxy | grep -v grep + # Expected: pulse-sensor-proxy user, not root + ``` + +3. **Check socket permissions:** + ```bash + ls -ld /run/pulse-sensor-proxy/ + # Expected: drwxrwxr-x pulse-sensor-proxy pulse-sensor-proxy + + ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock + # Expected: srwxrwxrwx pulse-sensor-proxy pulse-sensor-proxy + ``` + +4. **Test from container:** + ```bash + # Inside LXC container running Pulse + ls -la /run/pulse-sensor-proxy/ + # Should show socket + + # Check Pulse logs for connection success + journalctl -u pulse-backend -n 50 | grep -i temperature + ``` + +**Phase 4: End-to-End Validation** + +1. **Test RPC methods:** + ```bash + # On host, test socket connectivity + echo '{"correlation_id":"test-001","method":"register_nodes","params":{}}' | \ + sudo socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock | jq . + + # Should return cluster nodes list + ``` + +2. **Test temperature fetch:** + ```bash + # From container or via socket + echo '{"correlation_id":"test-002","method":"get_temp","params":{"node":"pve1"}}' | \ + socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock | jq . + + # Should return sensors JSON data + ``` + +3. **Verify metrics endpoint:** + ```bash + curl -s http://127.0.0.1:9127/metrics | grep pulse_proxy + + # Should show metrics like: + # pulse_proxy_rpc_requests_total{method="get_temp",result="success"} N + # pulse_proxy_queue_depth 0 + ``` + +4. **Test SSH key rotation:** + ```bash + # Dry-run first + sudo /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --dry-run + + # Full rotation (if confident) + sudo /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh + + # Verify all nodes updated + for node in pve1 pve2 pve3; do + ssh root@$node "tail -1 /root/.ssh/authorized_keys" + done + ``` + +5. **Audit logging verification:** + ```bash + # Check logs include correlation IDs and peer credentials + sudo journalctl -u pulse-sensor-proxy --since "5 minutes ago" -o json | \ + jq -r 'select(.corr_id != null) | [.corr_id, .uid, .method] | @tsv' + + # Should show structured logging with UIDs + ``` + +**Phase 5: Monitoring Setup** + +1. **Configure Prometheus scraping:** + ```yaml + # Add to prometheus.yml + scrape_configs: + - job_name: 'pulse-sensor-proxy' + static_configs: + - targets: ['localhost:9127'] + ``` + +2. **Import alert rules:** + ```bash + # Copy alert rules from docs to Prometheus alerts directory + # Reload Prometheus configuration + ``` + +3. **Verify alerts fire (optional stress test):** + ```bash + # Generate rate limit hits (test alert) + for i in {1..50}; do + echo '{"correlation_id":"stress-'$i'","method":"register_nodes","params":{}}' | \ + socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock & + done + wait + + # Check rate limit metric increased + curl -s http://127.0.0.1:9127/metrics | grep rate_limit_hits + ``` + +### Rollback Procedure + +If issues occur during rollout: + +1. **Stop new service:** + ```bash + sudo systemctl stop pulse-sensor-proxy + ``` + +2. **Restore backup:** + ```bash + sudo cp /etc/systemd/system/pulse-sensor-proxy.service.backup \ + /etc/systemd/system/pulse-sensor-proxy.service + sudo systemctl daemon-reload + ``` + +3. **Restore SSH keys (if rotated):** + ```bash + # If rotation was performed and failed + sudo /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --rollback + ``` + +4. **Restart with old configuration:** + ```bash + sudo systemctl restart pulse-sensor-proxy + sudo systemctl status pulse-sensor-proxy + ``` + +5. **Verify Pulse connectivity:** + ```bash + # Check Pulse can still fetch temperatures + # Monitor Pulse logs + ``` + +### Known Limitations + +- **No automated unit tests**: Code verification relies on build success and manual testing +- **Key rotation requires manual trigger**: Automated timer setup is optional +- **Metrics require Prometheus**: No built-in alerting without external monitoring +- **LXC bind mount required**: Container must have directory-level bind mount configured +- **Root required for rotation script**: Script needs root to run `ensure_cluster_keys` RPC + +### Future Improvements + +- Add Go unit tests for validation, throttling, and metrics logic +- Implement health check endpoint (e.g., `/health`) separate from metrics +- Add support for TLS on metrics endpoint +- Create automated integration test suite +- Add `--check` flag to rotation script for pre-flight validation +- Support for multiple LXC containers accessing same proxy instance + +## Appendix + +### Quick Verification Checklist + +**Host:** +- [ ] Service running as `pulse-sensor-proxy` user (not root) +- [ ] Keys in `/var/lib/pulse-sensor-proxy/ssh/` owned by `pulse-sensor-proxy:pulse-sensor-proxy` +- [ ] Private key permissions: `0600` +- [ ] Socket directory permissions: `0775` (not `0770`) +- [ ] Metrics endpoint accessible: `curl http://127.0.0.1:9127/metrics` + +**Container:** +- [ ] Container is unprivileged (`unprivileged: 1` in config) +- [ ] Bind mount exists: `ls /run/pulse-sensor-proxy/pulse-sensor-proxy.sock` +- [ ] AppArmor enforced: `cat /proc/self/attr/current` shows confinement +- [ ] Pulse can connect to socket (check Pulse logs) + +**SSH Keys:** +- [ ] All nodes have `pulse-sensor-proxy` key in `/root/.ssh/authorized_keys` +- [ ] Keys include `from="..."` restrictions +- [ ] Keys include `command="sensors -j"` forced command +- [ ] Keys include `no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty` + +**Monitoring:** +- [ ] Prometheus scraping metrics successfully +- [ ] Alerts configured for SSH failures, rate limiting, queue depth +- [ ] Logs forwarded to central logging (optional but recommended) + +### Reference Commands + +**Service Management:** +```bash +systemctl status pulse-sensor-proxy # Check service status +systemctl restart pulse-sensor-proxy # Restart service +journalctl -u pulse-sensor-proxy -f # Tail logs +``` + +**Key Management:** +```bash +/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --dry-run # Dry-run rotation +/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh # Perform rotation +/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --rollback # Rollback +``` + +**Metrics:** +```bash +curl http://127.0.0.1:9127/metrics # Fetch all metrics +curl -s http://127.0.0.1:9127/metrics | grep pulse_proxy # Filter proxy metrics +``` + +**Manual RPC (Testing):** +```bash +# Using socat (inline JSON) +echo '{"correlation_id":"test","method":"get_temp","params":{"node":"pve1"}}' | \ + socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock + +# Using Python (proper JSON-RPC client) +python3 <<'PY' +import json, socket, uuid +payload = { + "correlation_id": str(uuid.uuid4()), + "method": "get_temp", + "params": {"node": "pve1"} +} +with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s: + s.connect("/run/pulse-sensor-proxy/pulse-sensor-proxy.sock") + s.sendall((json.dumps(payload) + "\n").encode()) + s.shutdown(socket.SHUT_WR) + print(s.recv(65536).decode()) +PY +``` + +**Verification:** +```bash +# Check service user +ps aux | grep pulse-sensor-proxy | grep -v grep + +# Check file ownership +ls -lR /var/lib/pulse-sensor-proxy/ + +# Check bind mount in container +pct enter +ls -la /run/pulse-sensor-proxy/ + +# Check SSH keys on nodes +for node in pve1 pve2 pve3; do + echo "=== $node ===" + ssh root@$node "grep pulse-sensor-proxy /root/.ssh/authorized_keys" +done +``` + +--- + +**Document Version:** 1.0 +**Last Updated:** 2025-10-13 +**Applies To:** pulse-sensor-proxy v1.0+ diff --git a/docs/TEMPERATURE_MONITORING.md b/docs/TEMPERATURE_MONITORING.md index 512a63886..91e5139ed 100644 --- a/docs/TEMPERATURE_MONITORING.md +++ b/docs/TEMPERATURE_MONITORING.md @@ -18,8 +18,8 @@ Pulse can display real-time CPU and NVMe temperatures directly in your dashboard For **containerized deployments** (LXC/Docker), Pulse uses a secure proxy architecture: -1. **pulse-temp-proxy** runs on the Proxmox host (outside the container) -2. SSH keys are stored on the host filesystem (`/var/lib/pulse-temp-proxy/ssh/`) +1. **pulse-sensor-proxy** runs on the Proxmox host (outside the container) +2. SSH keys are stored on the host filesystem (`/var/lib/pulse-sensor-proxy/ssh/`) 3. Pulse communicates with the proxy via unix socket 4. The proxy handles all SSH connections to cluster nodes @@ -202,7 +202,7 @@ You can still manage the entry manually if you prefer, but no extra steps are re ### Secure Proxy Architecture (Current) -As of v4.24.0, containerized deployments use **pulse-temp-proxy** which eliminates the security concerns: +As of v4.24.0, containerized deployments use **pulse-sensor-proxy** which eliminates the security concerns: - **SSH keys stored on host** - Not accessible from container - **Unix socket communication** - Pulse never touches SSH keys @@ -294,13 +294,13 @@ To check if your deployment is using the secure proxy: ```bash # On Proxmox host - check proxy service -systemctl status pulse-temp-proxy +systemctl status pulse-sensor-proxy # Check if socket exists -ls -l /run/pulse-temp-proxy/pulse-temp-proxy.sock +ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock # View proxy logs -journalctl -u pulse-temp-proxy -f +journalctl -u pulse-sensor-proxy -f ``` In the Pulse container, check the logs at startup: @@ -327,27 +327,27 @@ Temperature data will stop appearing in the dashboard after the next polling cyc ### Managing the Proxy Service -The pulse-temp-proxy service runs on the Proxmox host (outside the container). +The pulse-sensor-proxy service runs on the Proxmox host (outside the container). **Service Management:** ```bash # Check service status -systemctl status pulse-temp-proxy +systemctl status pulse-sensor-proxy # Restart the proxy -systemctl restart pulse-temp-proxy +systemctl restart pulse-sensor-proxy # Stop the proxy (disables temperature monitoring) -systemctl stop pulse-temp-proxy +systemctl stop pulse-sensor-proxy # Start the proxy -systemctl start pulse-temp-proxy +systemctl start pulse-sensor-proxy # Enable proxy to start on boot -systemctl enable pulse-temp-proxy +systemctl enable pulse-sensor-proxy # Disable proxy autostart -systemctl disable pulse-temp-proxy +systemctl disable pulse-sensor-proxy ``` ### Log Locations @@ -355,16 +355,16 @@ systemctl disable pulse-temp-proxy **Proxy Logs (on Proxmox host):** ```bash # Follow proxy logs in real-time -journalctl -u pulse-temp-proxy -f +journalctl -u pulse-sensor-proxy -f # View last 50 lines -journalctl -u pulse-temp-proxy -n 50 +journalctl -u pulse-sensor-proxy -n 50 # View logs since last boot -journalctl -u pulse-temp-proxy -b +journalctl -u pulse-sensor-proxy -b # View logs with timestamps -journalctl -u pulse-temp-proxy --since "1 hour ago" +journalctl -u pulse-sensor-proxy --since "1 hour ago" ``` **Pulse Logs (in container):** @@ -381,12 +381,12 @@ Rotate SSH keys periodically for security (recommended every 90 days): ```bash # 1. On Proxmox host, backup old keys -cd /var/lib/pulse-temp-proxy/ssh/ +cd /var/lib/pulse-sensor-proxy/ssh/ cp id_ed25519 id_ed25519.backup cp id_ed25519.pub id_ed25519.pub.backup # 2. Generate new keypair -ssh-keygen -t ed25519 -f id_ed25519 -N "" -C "pulse-temp-proxy-rotated" +ssh-keygen -t ed25519 -f id_ed25519 -N "" -C "pulse-sensor-proxy-rotated" # 3. Get the new public key cat id_ed25519.pub @@ -398,12 +398,12 @@ ssh root@node2 "echo 'NEW_PUBLIC_KEY_HERE' >> /root/.ssh/authorized_keys" # ... repeat for all nodes # 5. Restart proxy to use new keys -systemctl restart pulse-temp-proxy +systemctl restart pulse-sensor-proxy # 6. Verify temperature data still works in Pulse UI # 7. Remove old keys from nodes (after confirming new keys work) -ssh root@node1 "sed -i '/pulse-temp-proxy-old/d' /root/.ssh/authorized_keys" +ssh root@node1 "sed -i '/pulse-sensor-proxy-old/d' /root/.ssh/authorized_keys" ``` ### Revoking Access When Nodes Leave @@ -412,7 +412,7 @@ When removing a node from your cluster: ```bash # On the node being removed, remove the proxy's public key -ssh root@old-node "sed -i '/pulse-temp-proxy/d' /root/.ssh/authorized_keys" +ssh root@old-node "sed -i '/pulse-sensor-proxy/d' /root/.ssh/authorized_keys" # No restart needed - proxy will fail gracefully for that node # Temperature monitoring will continue for remaining nodes @@ -422,14 +422,14 @@ ssh root@old-node "sed -i '/pulse-temp-proxy/d' /root/.ssh/authorized_keys" **Proxy Not Running:** - Symptom: No temperature data in Pulse UI -- Check: `systemctl status pulse-temp-proxy` on Proxmox host -- Fix: `systemctl start pulse-temp-proxy` +- Check: `systemctl status pulse-sensor-proxy` on Proxmox host +- Fix: `systemctl start pulse-sensor-proxy` **Socket Not Accessible in Container:** - Symptom: Pulse logs show "Temperature proxy not available - using direct SSH" -- Check: `ls -l /run/pulse-temp-proxy/pulse-temp-proxy.sock` in container +- Check: `ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock` in container - Fix: Verify bind mount in LXC config (`/etc/pve/lxc/.conf`) -- Should have: `lxc.mount.entry: /run/pulse-temp-proxy run/pulse-temp-proxy none bind,create=dir 0 0` +- Should have: `lxc.mount.entry: /run/pulse-sensor-proxy run/pulse-sensor-proxy none bind,create=dir 0 0` **pvecm Not Available:** - Symptom: Proxy fails to discover cluster nodes @@ -455,13 +455,13 @@ The proxy service includes systemd restart-on-failure, which handles most issues ```bash # Check proxy health -systemctl is-active pulse-temp-proxy && echo "Proxy is running" || echo "Proxy is down" +systemctl is-active pulse-sensor-proxy && echo "Proxy is running" || echo "Proxy is down" # Monitor logs for errors -journalctl -u pulse-temp-proxy --since "1 hour ago" | grep -i error +journalctl -u pulse-sensor-proxy --since "1 hour ago" | grep -i error # Verify socket exists and is accessible -test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock && echo "Socket OK" || echo "Socket missing" +test -S /run/pulse-sensor-proxy/pulse-sensor-proxy.sock && echo "Socket OK" || echo "Socket missing" ``` **Alerting:** @@ -474,7 +474,7 @@ test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock && echo "Socket OK" || echo ### Known Limitations **One Proxy Per Host:** -- Each Proxmox host runs one pulse-temp-proxy instance +- Each Proxmox host runs one pulse-sensor-proxy instance - If multiple Pulse containers run on same host, they share the same proxy - All containers see the same temperature data from the same cluster @@ -496,10 +496,10 @@ test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock && echo "Socket OK" || echo ### Common Issues **Temperature Data Stops Appearing:** -1. Check proxy service: `systemctl status pulse-temp-proxy` -2. Check proxy logs: `journalctl -u pulse-temp-proxy -n 50` +1. Check proxy service: `systemctl status pulse-sensor-proxy` +2. Check proxy logs: `journalctl -u pulse-sensor-proxy -n 50` 3. Test SSH manually: `ssh root@node "sensors -j"` -4. Verify socket exists: `ls -l /run/pulse-temp-proxy/pulse-temp-proxy.sock` +4. Verify socket exists: `ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock` **New Cluster Node Not Showing Temperatures:** 1. Ensure lm-sensors installed: `ssh root@new-node "sensors -j"` @@ -507,14 +507,14 @@ test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock && echo "Socket OK" || echo 3. Force refresh by restarting Pulse: `pct restart ` **Permission Denied Errors:** -1. Verify socket permissions: `ls -l /run/pulse-temp-proxy/pulse-temp-proxy.sock` +1. Verify socket permissions: `ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock` 2. Should be: `srw-rw---- 1 root root` 3. Check Pulse runs as root in container: `pct exec -- whoami` **Proxy Service Won't Start:** -1. Check logs: `journalctl -u pulse-temp-proxy -n 50` -2. Verify binary exists: `ls -l /usr/local/bin/pulse-temp-proxy` -3. Test manually: `/usr/local/bin/pulse-temp-proxy --version` +1. Check logs: `journalctl -u pulse-sensor-proxy -n 50` +2. Verify binary exists: `ls -l /usr/local/bin/pulse-sensor-proxy` +3. Test manually: `/usr/local/bin/pulse-sensor-proxy --version` 4. Check socket directory: `ls -ld /var/run` ### Getting Help @@ -524,9 +524,9 @@ If temperature monitoring isn't working: 1. **Collect diagnostic info:** ```bash # On Proxmox host - systemctl status pulse-temp-proxy - journalctl -u pulse-temp-proxy -n 100 > /tmp/proxy-logs.txt - ls -la /run/pulse-temp-proxy/pulse-temp-proxy.sock + systemctl status pulse-sensor-proxy + journalctl -u pulse-sensor-proxy -n 100 > /tmp/proxy-logs.txt + ls -la /run/pulse-sensor-proxy/pulse-sensor-proxy.sock # In Pulse container journalctl -u pulse -n 100 | grep -i temp > /tmp/pulse-temp-logs.txt diff --git a/go.mod b/go.mod index 98d3251d5..9884e7d75 100644 --- a/go.mod +++ b/go.mod @@ -8,18 +8,24 @@ require ( github.com/coreos/go-oidc/v3 v3.15.0 github.com/docker/docker v28.5.1+incompatible github.com/fsnotify/fsnotify v1.9.0 + github.com/google/uuid v1.6.0 github.com/gorilla/websocket v1.5.3 github.com/joho/godotenv v1.5.1 + github.com/oklog/ulid/v2 v2.1.1 + github.com/prometheus/client_golang v1.23.2 github.com/rs/zerolog v1.34.0 github.com/spf13/cobra v1.9.1 golang.org/x/crypto v0.42.0 golang.org/x/oauth2 v0.31.0 golang.org/x/term v0.35.0 + golang.org/x/time v0.13.0 gopkg.in/yaml.v3 v3.0.1 ) require ( github.com/Microsoft/go-winio v0.4.21 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/containerd/errdefs v1.0.0 // indirect github.com/containerd/errdefs/pkg v0.3.0 // indirect github.com/containerd/log v0.1.0 // indirect @@ -37,10 +43,13 @@ require ( github.com/moby/sys/atomicwriter v0.1.0 // indirect github.com/moby/term v0.5.2 // indirect github.com/morikuni/aec v1.0.0 // indirect - github.com/oklog/ulid/v2 v2.1.1 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect github.com/pkg/errors v0.9.1 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.66.1 // indirect + github.com/prometheus/procfs v0.16.1 // indirect github.com/spf13/pflag v1.0.7 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect @@ -48,7 +57,8 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 // indirect go.opentelemetry.io/otel/metric v1.38.0 // indirect go.opentelemetry.io/otel/trace v1.38.0 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect golang.org/x/sys v0.36.0 // indirect - golang.org/x/time v0.13.0 // indirect + google.golang.org/protobuf v1.36.8 // indirect gotest.tools/v3 v3.5.2 // indirect ) diff --git a/go.sum b/go.sum index 056def50d..4312ec428 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,12 @@ github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEK github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Microsoft/go-winio v0.4.21 h1:+6mVbXh4wPzUrl1COX9A+ZCvEpYsOBZ6/+kwDnvLyro= github.com/Microsoft/go-winio v0.4.21/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= @@ -48,10 +52,14 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= @@ -69,6 +77,8 @@ github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ= github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/oklog/ulid/v2 v2.1.1 h1:suPZ4ARWLOJLegGFiZZ1dFAkqzhMjL3J1TzI+5wHz8s= github.com/oklog/ulid/v2 v2.1.1/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= @@ -80,6 +90,14 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= @@ -117,6 +135,10 @@ go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJr go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI= golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8= golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= diff --git a/internal/api/config_handlers.go b/internal/api/config_handlers.go index e5f38d794..67a96c03e 100644 --- a/internal/api/config_handlers.go +++ b/internal/api/config_handlers.go @@ -3230,7 +3230,7 @@ echo " • No port forwarding, X11, PTY, or agent forwarding allowed" echo "" echo "For containerized Pulse (LXC/Docker):" echo " • SSH keys stored on Proxmox host (not inside container)" -echo " • pulse-temp-proxy service manages connections securely" +echo " • pulse-sensor-proxy service manages connections securely" echo " • Container compromise does not expose SSH credentials" echo "" echo "For native Pulse installations:" diff --git a/internal/tempproxy/client.go b/internal/tempproxy/client.go index 1ffa08198..75db56449 100644 --- a/internal/tempproxy/client.go +++ b/internal/tempproxy/client.go @@ -11,11 +11,11 @@ import ( ) const ( - defaultSocketPath = "/run/pulse-temp-proxy/pulse-temp-proxy.sock" + defaultSocketPath = "/run/pulse-sensor-proxy/pulse-sensor-proxy.sock" defaultTimeout = 10 * time.Second ) -// Client communicates with pulse-temp-proxy via unix socket +// Client communicates with pulse-sensor-proxy via unix socket type Client struct { socketPath string timeout time.Duration @@ -23,7 +23,7 @@ type Client struct { // NewClient creates a new proxy client func NewClient() *Client { - socketPath := os.Getenv("PULSE_TEMP_PROXY_SOCKET") + socketPath := os.Getenv("PULSE_SENSOR_PROXY_SOCKET") if socketPath == "" { socketPath = defaultSocketPath } diff --git a/scripts/build-release.sh b/scripts/build-release.sh index db4f52b8c..0a87aa912 100755 --- a/scripts/build-release.sh +++ b/scripts/build-release.sh @@ -67,8 +67,8 @@ for build_name in "${!builds[@]}"; do env $build_env go build \ -ldflags="-s -w -X main.Version=v${VERSION} -X main.BuildTime=${build_time} -X main.GitCommit=${git_commit}" \ -trimpath \ - -o "$BUILD_DIR/pulse-temp-proxy-$build_name" \ - ./cmd/pulse-temp-proxy + -o "$BUILD_DIR/pulse-sensor-proxy-$build_name" \ + ./cmd/pulse-sensor-proxy # Create release archive with proper structure tar_name="pulse-v${VERSION}-${build_name}.tar.gz" @@ -82,7 +82,7 @@ for build_name in "${!builds[@]}"; do # Copy binaries and VERSION file cp "$BUILD_DIR/pulse-$build_name" "$staging_dir/bin/pulse" cp "$BUILD_DIR/pulse-docker-agent-$build_name" "$staging_dir/bin/pulse-docker-agent" - cp "$BUILD_DIR/pulse-temp-proxy-$build_name" "$staging_dir/bin/pulse-temp-proxy" + cp "$BUILD_DIR/pulse-sensor-proxy-$build_name" "$staging_dir/bin/pulse-sensor-proxy" cp "scripts/install-docker-agent.sh" "$staging_dir/scripts/install-docker-agent.sh" chmod 755 "$staging_dir/scripts/install-docker-agent.sh" echo "$VERSION" > "$staging_dir/VERSION" @@ -109,7 +109,7 @@ mkdir -p "$universal_dir/scripts" for build_name in "${!builds[@]}"; do cp "$BUILD_DIR/pulse-$build_name" "$universal_dir/bin/pulse-${build_name}" cp "$BUILD_DIR/pulse-docker-agent-$build_name" "$universal_dir/bin/pulse-docker-agent-${build_name}" - cp "$BUILD_DIR/pulse-temp-proxy-$build_name" "$universal_dir/bin/pulse-temp-proxy-${build_name}" + cp "$BUILD_DIR/pulse-sensor-proxy-$build_name" "$universal_dir/bin/pulse-sensor-proxy-${build_name}" done cp "scripts/install-docker-agent.sh" "$universal_dir/scripts/install-docker-agent.sh" @@ -162,20 +162,20 @@ esac EOF chmod +x "$universal_dir/bin/pulse-docker-agent" -cat > "$universal_dir/bin/pulse-temp-proxy" << 'EOF' +cat > "$universal_dir/bin/pulse-sensor-proxy" << 'EOF' #!/bin/sh -# Auto-detect architecture and run appropriate pulse-temp-proxy binary +# Auto-detect architecture and run appropriate pulse-sensor-proxy binary ARCH=$(uname -m) case "$ARCH" in x86_64|amd64) - exec "$(dirname "$0")/pulse-temp-proxy-linux-amd64" "$@" + exec "$(dirname "$0")/pulse-sensor-proxy-linux-amd64" "$@" ;; aarch64|arm64) - exec "$(dirname "$0")/pulse-temp-proxy-linux-arm64" "$@" + exec "$(dirname "$0")/pulse-sensor-proxy-linux-arm64" "$@" ;; armv7l|armhf) - exec "$(dirname "$0")/pulse-temp-proxy-linux-armv7" "$@" + exec "$(dirname "$0")/pulse-sensor-proxy-linux-armv7" "$@" ;; *) echo "Unsupported architecture: $ARCH" >&2 @@ -183,7 +183,7 @@ case "$ARCH" in ;; esac EOF -chmod +x "$universal_dir/bin/pulse-temp-proxy" +chmod +x "$universal_dir/bin/pulse-sensor-proxy" # Add VERSION file echo "$VERSION" > "$universal_dir/VERSION" @@ -196,16 +196,16 @@ cd ../.. # Cleanup rm -rf "$universal_dir" -# Copy standalone pulse-temp-proxy binaries to release directory +# Copy standalone pulse-sensor-proxy binaries to release directory # These are needed by install-temp-proxy.sh installer script -echo "Copying standalone pulse-temp-proxy binaries..." +echo "Copying standalone pulse-sensor-proxy binaries..." for build_name in "${!builds[@]}"; do - cp "$BUILD_DIR/pulse-temp-proxy-$build_name" "$RELEASE_DIR/" + cp "$BUILD_DIR/pulse-sensor-proxy-$build_name" "$RELEASE_DIR/" done # Generate checksums (include tarballs and standalone binaries) cd $RELEASE_DIR -sha256sum *.tar.gz pulse-temp-proxy-* > checksums.txt +sha256sum *.tar.gz pulse-sensor-proxy-* > checksums.txt cd .. echo diff --git a/scripts/install-temp-proxy.sh b/scripts/install-temp-proxy.sh index 017c9dfdb..3ace69f17 100755 --- a/scripts/install-temp-proxy.sh +++ b/scripts/install-temp-proxy.sh @@ -1,6 +1,6 @@ #!/bin/bash -# install-temp-proxy.sh - Installs pulse-temp-proxy on Proxmox host for secure temperature monitoring +# install-temp-proxy.sh - Installs pulse-sensor-proxy on Proxmox host for secure temperature monitoring # This script is idempotent and can be safely re-run set -euo pipefail @@ -67,13 +67,22 @@ if ! pct status "$CTID" >/dev/null 2>&1; then exit 1 fi -print_info "Installing pulse-temp-proxy for container $CTID" +print_info "Installing pulse-sensor-proxy for container $CTID" -BINARY_PATH="/usr/local/bin/pulse-temp-proxy" -SERVICE_PATH="/etc/systemd/system/pulse-temp-proxy.service" -RUNTIME_DIR="/run/pulse-temp-proxy" -SOCKET_PATH="/run/pulse-temp-proxy/pulse-temp-proxy.sock" -SSH_DIR="/var/lib/pulse-temp-proxy/ssh" +BINARY_PATH="/usr/local/bin/pulse-sensor-proxy" +SERVICE_PATH="/etc/systemd/system/pulse-sensor-proxy.service" +RUNTIME_DIR="/run/pulse-sensor-proxy" +SOCKET_PATH="/run/pulse-sensor-proxy/pulse-sensor-proxy.sock" +SSH_DIR="/var/lib/pulse-sensor-proxy/ssh" + +# Create dedicated service account if it doesn't exist +if ! id -u pulse-sensor-proxy >/dev/null 2>&1; then + print_info "Creating pulse-sensor-proxy service account..." + useradd --system --user-group --no-create-home --shell /usr/sbin/nologin pulse-sensor-proxy + print_info "Service account created" +else + print_info "Service account pulse-sensor-proxy already exists" +fi # Install binary - either from local file or download from GitHub if [[ -n "$LOCAL_BINARY" ]]; then @@ -105,13 +114,13 @@ else ARCH=$(uname -m) case $ARCH in x86_64) - BINARY_NAME="pulse-temp-proxy-linux-amd64" + BINARY_NAME="pulse-sensor-proxy-linux-amd64" ;; aarch64|arm64) - BINARY_NAME="pulse-temp-proxy-linux-arm64" + BINARY_NAME="pulse-sensor-proxy-linux-arm64" ;; armv7l|armhf) - BINARY_NAME="pulse-temp-proxy-linux-armv7" + BINARY_NAME="pulse-sensor-proxy-linux-armv7" ;; *) print_error "Unsupported architecture: $ARCH" @@ -134,12 +143,19 @@ else print_info "Binary installed to $BINARY_PATH" fi -# Create SSH key directory -mkdir -p "$SSH_DIR" -chmod 700 "$SSH_DIR" +# Create directories with proper ownership (handles fresh installs and upgrades) +print_info "Setting up directories with proper ownership..." +install -d -o pulse-sensor-proxy -g pulse-sensor-proxy -m 0750 /var/lib/pulse-sensor-proxy +install -d -o pulse-sensor-proxy -g pulse-sensor-proxy -m 0700 "$SSH_DIR" -# Install systemd service -print_info "Installing systemd service..." +# Stop existing service if running (for upgrades) +if systemctl is-active --quiet pulse-sensor-proxy 2>/dev/null; then + print_info "Stopping existing service for upgrade..." + systemctl stop pulse-sensor-proxy +fi + +# Install hardened systemd service +print_info "Installing hardened systemd service..." cat > "$SERVICE_PATH" << 'EOF' [Unit] Description=Pulse Temperature Proxy @@ -148,26 +164,47 @@ After=network.target [Service] Type=simple -User=root -ExecStart=/usr/local/bin/pulse-temp-proxy +User=pulse-sensor-proxy +Group=pulse-sensor-proxy +WorkingDirectory=/var/lib/pulse-sensor-proxy +ExecStart=/usr/local/bin/pulse-sensor-proxy Restart=on-failure RestartSec=5s -# Runtime directory for socket -RuntimeDirectory=pulse-temp-proxy +# Runtime dirs/sockets +RuntimeDirectory=pulse-sensor-proxy RuntimeDirectoryMode=0775 +UMask=0007 -# Security hardening +# Core hardening NoNewPrivileges=true -PrivateTmp=true ProtectSystem=strict -ProtectHome=true -ReadWritePaths=/var/lib/pulse-temp-proxy +ProtectHome=read-only +ReadWritePaths=/var/lib/pulse-sensor-proxy +ProtectKernelTunables=true +ProtectKernelModules=true +ProtectControlGroups=true +ProtectClock=true +PrivateTmp=true +PrivateDevices=true +ProtectProc=invisible +ProcSubset=pid +LockPersonality=true +RemoveIPC=true +RestrictSUIDSGID=true +RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 +RestrictNamespaces=true +SystemCallFilter=@system-service +SystemCallErrorNumber=EPERM +CapabilityBoundingSet= +AmbientCapabilities= +KeyringMode=private +LimitNOFILE=1024 # Logging StandardOutput=journal StandardError=journal -SyslogIdentifier=pulse-temp-proxy +SyslogIdentifier=pulse-sensor-proxy [Install] WantedBy=multi-user.target @@ -176,8 +213,8 @@ EOF # Reload systemd and start service print_info "Enabling and starting service..." systemctl daemon-reload -systemctl enable pulse-temp-proxy.service -systemctl restart pulse-temp-proxy.service +systemctl enable pulse-sensor-proxy.service +systemctl restart pulse-sensor-proxy.service # Wait for socket to appear print_info "Waiting for socket..." @@ -190,7 +227,7 @@ done if [[ ! -S "$SOCKET_PATH" ]]; then print_error "Socket did not appear after 10 seconds" - print_info "Check service status: systemctl status pulse-temp-proxy" + print_info "Check service status: systemctl status pulse-sensor-proxy" exit 1 fi @@ -198,15 +235,15 @@ print_info "Socket ready at $SOCKET_PATH" # Configure LXC bind mount - mount entire directory for socket stability LXC_CONFIG="/etc/pve/lxc/${CTID}.conf" -BIND_ENTRY="lxc.mount.entry: /run/pulse-temp-proxy run/pulse-temp-proxy none bind,create=dir 0 0" +BIND_ENTRY="lxc.mount.entry: /run/pulse-sensor-proxy run/pulse-sensor-proxy none bind,create=dir 0 0" # Check if bind mount already exists -if grep -q "pulse-temp-proxy" "$LXC_CONFIG"; then +if grep -q "pulse-sensor-proxy" "$LXC_CONFIG"; then print_info "Bind mount already configured in LXC config" # Remove old socket-level bind if it exists - if grep -q "pulse-temp-proxy.sock" "$LXC_CONFIG"; then + if grep -q "pulse-sensor-proxy.sock" "$LXC_CONFIG"; then print_info "Upgrading from socket-level to directory-level bind mount..." - sed -i '/pulse-temp-proxy\.sock/d' "$LXC_CONFIG" + sed -i '/pulse-sensor-proxy\.sock/d' "$LXC_CONFIG" echo "$BIND_ENTRY" >> "$LXC_CONFIG" NEEDS_RESTART=true fi @@ -227,7 +264,7 @@ fi # Verify socket is accessible in container print_info "Verifying socket accessibility..." -if pct exec "$CTID" -- test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock; then +if pct exec "$CTID" -- test -S /run/pulse-sensor-proxy/pulse-sensor-proxy.sock; then print_info "Socket is accessible in container" else print_warn "Socket is not yet accessible in container" @@ -236,11 +273,11 @@ fi # Test proxy status print_info "Testing proxy status..." -if systemctl is-active --quiet pulse-temp-proxy; then - print_info "${GREEN}✓${NC} pulse-temp-proxy is running" +if systemctl is-active --quiet pulse-sensor-proxy; then + print_info "${GREEN}✓${NC} pulse-sensor-proxy is running" else - print_error "pulse-temp-proxy is not running" - print_info "Check logs: journalctl -u pulse-temp-proxy -n 50" + print_error "pulse-sensor-proxy is not running" + print_info "Check logs: journalctl -u pulse-sensor-proxy -n 50" exit 1 fi @@ -255,7 +292,7 @@ print_info " 2. Go to Settings → Enable Temperature Monitoring" print_info " 3. The proxy will automatically discover and configure cluster nodes" print_info "" print_info "To check proxy status:" -print_info " systemctl status pulse-temp-proxy" -print_info " journalctl -u pulse-temp-proxy -f" +print_info " systemctl status pulse-sensor-proxy" +print_info " journalctl -u pulse-sensor-proxy -f" exit 0 diff --git a/scripts/pulse-proxy-rotate-keys.sh b/scripts/pulse-proxy-rotate-keys.sh new file mode 100755 index 000000000..c448cf336 --- /dev/null +++ b/scripts/pulse-proxy-rotate-keys.sh @@ -0,0 +1,314 @@ +#!/usr/bin/env bash +# pulse-proxy-rotate-keys.sh +# Rotate pulse-sensor-proxy SSH keys with staging, verification, and rollback support. + +set -euo pipefail + +BASE_DIR="/var/lib/pulse-sensor-proxy" +ACTIVE_DIR="${BASE_DIR}/ssh" +POOL_DIR="${BASE_DIR}/ssh.d" +STAGING_DIR="${POOL_DIR}/next" +BACKUP_DIR="${POOL_DIR}/prev" +SOCKET_PATH="/run/pulse-sensor-proxy/pulse-sensor-proxy.sock" +SCRIPT_TAG="pulse-proxy-rotate" +SSH_KEY_TYPE="ed25519" +SSH_KEY_COMMENT="pulse-sensor-proxy" +SSH_KEY_FILE="id_${SSH_KEY_TYPE}" + +dry_run=false +do_rollback=false + +usage() { + cat <<'EOF' +Usage: pulse-proxy-rotate-keys.sh [--dry-run] [--rollback] + +Options: + --dry-run Walk through all steps without modifying state or contacting nodes. + --rollback Restore the previously active keypair (requires ssh.d/prev). + -h, --help Show this help. + +Examples: + ./pulse-proxy-rotate-keys.sh --dry-run + ./pulse-proxy-rotate-keys.sh + ./pulse-proxy-rotate-keys.sh --rollback +EOF +} + +log_info() { logger -t "${SCRIPT_TAG}" "INFO: $*"; printf '[INFO] %s\n' "$*"; } +log_warn() { logger -t "${SCRIPT_TAG}" "WARN: $*"; printf '[WARN] %s\n' "$*"; } +log_error() { logger -t "${SCRIPT_TAG}" "ERROR: $*"; printf '[ERROR] %s\n' "$*" >&2; } + +require_root() { + if (( EUID != 0 )); then + log_error "This script must be run as root." + exit 1 + fi +} + +require_cmds() { + local missing=() + for cmd in ssh-keygen ssh jq socat python3 stat mkdir; do + if ! command -v "$cmd" >/dev/null 2>&1; then + missing+=("$cmd") + fi + done + if ((${#missing[@]} > 0)); then + log_error "Missing required commands: ${missing[*]}" + exit 1 + fi +} + +parse_args() { + while (($#)); do + case "$1" in + --dry-run) dry_run=true ;; + --rollback) do_rollback=true ;; + -h|--help) usage; exit 0 ;; + *) log_error "Unknown option: $1"; usage; exit 1 ;; + esac + shift + done + if $dry_run && $do_rollback; then + log_error "Cannot combine --dry-run and --rollback." + exit 1 + fi +} + +ensure_socket() { + if [[ ! -S "$SOCKET_PATH" ]]; then + log_error "Proxy socket not found at $SOCKET_PATH. Is pulse-sensor-proxy running?" + exit 1 + fi +} + +run_cmd() { + if $dry_run; then + log_info "[dry-run] $*" + else + "$@" + fi +} + +json_rpc() { + local method=$1 + local params_json=${2:-"{}"} + local response + if $dry_run; then + log_info "[dry-run] would call RPC ${method} with params ${params_json}" + printf '{"success":true,"data":{}}' + return 0 + fi + + response=$(SOCKET="$SOCKET_PATH" METHOD="$method" PARAMS="$params_json" python3 - <<'PY' +import json +import os +import socket +import sys +import uuid + +sock_path = os.environ["SOCKET"] +method = os.environ["METHOD"] +params = json.loads(os.environ["PARAMS"]) if os.environ["PARAMS"] else {} +payload = { + "correlation_id": str(uuid.uuid4()), + "method": method, + "params": params, +} + +data = (json.dumps(payload) + "\n").encode() +with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as sock: + sock.connect(sock_path) + sock.sendall(data) + sock.shutdown(socket.SHUT_WR) + chunks = [] + while True: + chunk = sock.recv(65536) + if not chunk: + break + chunks.append(chunk) + sys.stdout.write(b"".join(chunks).decode()) +PY +) || { + log_error "RPC '${method}' failed to execute." + exit 1 + } + echo "$response" +} + +require_success() { + local resp=$1 + local method=$2 + local ok + ok=$(echo "$resp" | jq -r '.success // false') + if [[ "$ok" != "true" ]]; then + local err + err=$(echo "$resp" | jq -r '.error // empty') + log_error "RPC '${method}' failed: ${err:-unknown error}" + exit 1 + fi +} + +prepare_dirs() { + for dir in "$BASE_DIR" "$POOL_DIR" "$STAGING_DIR"; do + if $dry_run; then + log_info "[dry-run] ensure directory $dir owned by pulse-proxy:pulse-proxy" + continue + fi + mkdir -p "$dir" + chown pulse-proxy:pulse-proxy "$dir" + chmod 0750 "$dir" + done +} + +clean_staging() { + if [[ -d "$STAGING_DIR" ]]; then + if $dry_run; then + log_info "[dry-run] would remove existing staging directory $STAGING_DIR" + else + rm -rf "$STAGING_DIR" + mkdir -p "$STAGING_DIR" + chown pulse-proxy:pulse-proxy "$STAGING_DIR" + chmod 0750 "$STAGING_DIR" + fi + fi +} + +generate_keypair() { + local key_path="$STAGING_DIR/${SSH_KEY_FILE}" + if $dry_run; then + log_info "[dry-run] would generate new ${SSH_KEY_TYPE} keypair at $key_path" + return + fi + clean_staging + log_info "Generating new ${SSH_KEY_TYPE} keypair in staging..." + ssh-keygen -t "$SSH_KEY_TYPE" -N '' -C "$SSH_KEY_COMMENT rotation $(date -u +%Y%m%dT%H%M%SZ)" -f "$key_path" >/dev/null + chown pulse-proxy:pulse-proxy "$key_path" "${key_path}.pub" + chmod 0600 "$key_path" + chmod 0640 "${key_path}.pub" +} + +ensure_cluster_keys() { + local key_dir=$1 + local payload + payload=$(jq -cn --arg dir "$key_dir" '{key_dir: $dir}') + local resp + resp=$(json_rpc "ensure_cluster_keys" "$payload") + require_success "$resp" "ensure_cluster_keys" + log_info "Proxy reported successful key distribution." +} + +list_nodes() { + local resp + resp=$(json_rpc "register_nodes") + require_success "$resp" "register_nodes" + echo "$resp" | jq -r '.data.nodes[]?.name // empty' | sort -u +} + +verify_nodes() { + local key_file="$1" + local -a bad_nodes=() + local rc + while read -r node; do + [[ -z "$node" ]] && continue + log_info "Verifying SSH access on ${node}..." + if $dry_run; then + log_info "[dry-run] would run ssh -i $key_file root@${node} sensors -j" + continue + fi + if ssh -i "$key_file" -o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10 "root@${node}" "sensors -j" >/dev/null 2>&1; then + log_info "Verification succeeded for ${node}." + else + log_warn "Verification failed for ${node}." + bad_nodes+=("$node") + fi + done < <(list_nodes) + + if ((${#bad_nodes[@]} > 0)); then + log_error "Verification failed for: ${bad_nodes[*]}" + exit 1 + fi +} + +swap_keys() { + local timestamp + timestamp=$(date -u +%Y%m%dT%H%M%SZ) + + if $dry_run; then + log_info "[dry-run] would rotate directories:" + log_info "[dry-run] mv ${BACKUP_DIR} ${POOL_DIR}/prev.${timestamp} (if exists)" + log_info "[dry-run] mv ${ACTIVE_DIR} ${BACKUP_DIR}" + log_info "[dry-run] mv ${STAGING_DIR} ${ACTIVE_DIR}" + return + fi + + log_info "Activating new keypair..." + if [[ -d "$BACKUP_DIR" ]]; then + mv "$BACKUP_DIR" "${POOL_DIR}/prev.${timestamp}" + fi + mv "$ACTIVE_DIR" "$BACKUP_DIR" + mv "$STAGING_DIR" "$ACTIVE_DIR" + chown -R pulse-proxy:pulse-proxy "$ACTIVE_DIR" "$BACKUP_DIR" + chmod 0750 "$ACTIVE_DIR" "$BACKUP_DIR" + chmod 0600 "$ACTIVE_DIR/${SSH_KEY_FILE}" + chmod 0640 "$ACTIVE_DIR/${SSH_KEY_FILE}.pub" + log_info "Key rotation complete. Previous keys stored at ${BACKUP_DIR}." +} + +rollback_keys() { + if [[ ! -d "$BACKUP_DIR" ]]; then + log_error "No backup directory (${BACKUP_DIR}) present. Cannot rollback." + exit 1 + fi + local timestamp + timestamp=$(date -u +%Y%m%dT%H%M%SZ) + + if $dry_run; then + log_info "[dry-run] would rollback by swapping ${ACTIVE_DIR} with ${BACKUP_DIR}" + return + fi + + log_warn "Rolling back to previous keypair..." + local failed_dir="${POOL_DIR}/failed.${timestamp}" + if [[ -d "$ACTIVE_DIR" ]]; then + mv "$ACTIVE_DIR" "$failed_dir" + fi + mv "$BACKUP_DIR" "$ACTIVE_DIR" + chown -R pulse-proxy:pulse-proxy "$ACTIVE_DIR" + chmod 0600 "$ACTIVE_DIR/${SSH_KEY_FILE}" + chmod 0640 "$ACTIVE_DIR/${SSH_KEY_FILE}.pub" + log_info "Rollback complete. Old keys preserved at ${failed_dir}." + + log_info "Re-pushing restored keypair to cluster nodes..." + ensure_cluster_keys "$ACTIVE_DIR" +} + +main() { + parse_args "$@" + require_root + require_cmds + + if $do_rollback; then + ensure_socket + rollback_keys + return + fi + + prepare_dirs + ensure_socket + + generate_keypair + + local staging_key="${STAGING_DIR}/${SSH_KEY_FILE}" + if [[ ! -f "${staging_key}" && $dry_run == false ]]; then + log_error "Staged private key missing at ${staging_key}" + exit 1 + fi + + ensure_cluster_keys "$STAGING_DIR" + verify_nodes "$staging_key" + swap_keys + + log_info "Rotation workflow finished successfully." +} + +main "$@" diff --git a/scripts/pulse-sensor-proxy.service b/scripts/pulse-sensor-proxy.service new file mode 100644 index 000000000..7452614f1 --- /dev/null +++ b/scripts/pulse-sensor-proxy.service @@ -0,0 +1,51 @@ +[Unit] +Description=Pulse Sensor Proxy +Documentation=https://github.com/rcourtman/Pulse +After=network.target + +[Service] +Type=simple +User=pulse-sensor-proxy +Group=pulse-sensor-proxy +WorkingDirectory=/var/lib/pulse-sensor-proxy +ExecStart=/usr/local/bin/pulse-sensor-proxy +Restart=on-failure +RestartSec=5s + +# Runtime dirs/sockets +RuntimeDirectory=pulse-sensor-proxy +RuntimeDirectoryMode=0775 +UMask=0007 + +# Core hardening +NoNewPrivileges=true +ProtectSystem=strict +ProtectHome=read-only +ReadWritePaths=/var/lib/pulse-sensor-proxy +ProtectKernelTunables=true +ProtectKernelModules=true +ProtectControlGroups=true +ProtectClock=true +PrivateTmp=true +PrivateDevices=true +ProtectProc=invisible +ProcSubset=pid +LockPersonality=true +RemoveIPC=true +RestrictSUIDSGID=true +RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 +RestrictNamespaces=true +SystemCallFilter=@system-service +SystemCallErrorNumber=EPERM +CapabilityBoundingSet= +AmbientCapabilities= +KeyringMode=private +LimitNOFILE=1024 + +# Logging +StandardOutput=journal +StandardError=journal +SyslogIdentifier=pulse-sensor-proxy + +[Install] +WantedBy=multi-user.target diff --git a/scripts/pulse-temp-proxy.service b/scripts/pulse-temp-proxy.service deleted file mode 100644 index 9ec26443e..000000000 --- a/scripts/pulse-temp-proxy.service +++ /dev/null @@ -1,26 +0,0 @@ -[Unit] -Description=Pulse Temperature Proxy -Documentation=https://github.com/rcourtman/Pulse -After=network.target - -[Service] -Type=simple -User=root -ExecStart=/usr/local/bin/pulse-temp-proxy -Restart=on-failure -RestartSec=5s - -# Security hardening -NoNewPrivileges=true -PrivateTmp=true -ProtectSystem=strict -ProtectHome=true -ReadWritePaths=/var/lib/pulse-temp-proxy /var/run - -# Logging -StandardOutput=journal -StandardError=journal -SyslogIdentifier=pulse-temp-proxy - -[Install] -WantedBy=multi-user.target