refactor: Rename pulse-temp-proxy to pulse-sensor-proxy

The name "temp-proxy" implied a temporary or incomplete implementation. The new name better reflects its purpose as a secure sensor data bridge for containerized Pulse deployments.

Changes:
- Renamed cmd/pulse-temp-proxy/ to cmd/pulse-sensor-proxy/
- Updated all path constants and binary references
- Renamed environment variables: PULSE_TEMP_PROXY_* to PULSE_SENSOR_PROXY_*
- Updated systemd service and service account name
- Updated installation, rotation, and build scripts
- Renamed hardening documentation
- Maintained backward compatibility for key removal during upgrades
This commit is contained in:
rcourtman 2025-10-13 13:17:05 +00:00
parent e23a6b9631
commit b952444837
21 changed files with 3012 additions and 729 deletions

2
.gitignore vendored
View file

@ -145,4 +145,4 @@ cloud-relay/
scripts/agent/
docs/internal/
claude.md
pulse-temp-proxy
/pulse-sensor-proxy

View file

@ -8,18 +8,25 @@ import (
"github.com/rs/zerolog/log"
)
// verifyPeerCredentials checks if the connecting process is authorized
// Returns nil if authorized, error otherwise
func verifyPeerCredentials(conn net.Conn) error {
// peerCredentials holds extracted credentials from SO_PEERCRED
type peerCredentials struct {
uid uint32
pid uint32
gid uint32
}
// extractPeerCredentials extracts and verifies peer credentials
// Returns credentials if authorized, error otherwise
func extractPeerCredentials(conn net.Conn) (*peerCredentials, error) {
// Get the underlying file descriptor
unixConn, ok := conn.(*net.UnixConn)
if !ok {
return fmt.Errorf("not a unix connection")
return nil, fmt.Errorf("not a unix connection")
}
file, err := unixConn.File()
if err != nil {
return fmt.Errorf("failed to get file descriptor: %w", err)
return nil, fmt.Errorf("failed to get file descriptor: %w", err)
}
defer file.Close()
@ -28,7 +35,7 @@ func verifyPeerCredentials(conn net.Conn) error {
// Get peer credentials using SO_PEERCRED
cred, err := syscall.GetsockoptUcred(fd, syscall.SOL_SOCKET, syscall.SO_PEERCRED)
if err != nil {
return fmt.Errorf("failed to get peer credentials: %w", err)
return nil, fmt.Errorf("failed to get peer credentials: %w", err)
}
log.Debug().
@ -39,14 +46,29 @@ func verifyPeerCredentials(conn net.Conn) error {
// Allow root (UID 0) - this covers most service scenarios
if cred.Uid == 0 {
return nil
return &peerCredentials{
uid: cred.Uid,
pid: uint32(cred.Pid),
gid: cred.Gid,
}, nil
}
// Allow the proxy's own user (for testing/debugging)
if cred.Uid == uint32(syscall.Getuid()) {
return nil
return &peerCredentials{
uid: cred.Uid,
pid: uint32(cred.Pid),
gid: cred.Gid,
}, nil
}
// Reject all other users
return fmt.Errorf("unauthorized: uid=%d gid=%d", cred.Uid, cred.Gid)
return nil, fmt.Errorf("unauthorized: uid=%d gid=%d", cred.Uid, cred.Gid)
}
// verifyPeerCredentials checks if the connecting process is authorized (legacy function)
// Returns nil if authorized, error otherwise
func verifyPeerCredentials(conn net.Conn) error {
_, err := extractPeerCredentials(conn)
return err
}

View file

@ -0,0 +1,172 @@
package main
import (
"fmt"
"net"
"os"
"strings"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v3"
)
// Config holds proxy configuration
type Config struct {
AllowedSourceSubnets []string `yaml:"allowed_source_subnets"`
MetricsAddress string `yaml:"metrics_address"`
}
// loadConfig loads configuration from file and environment variables
func loadConfig(configPath string) (*Config, error) {
cfg := &Config{}
// Try to load config file if it exists
if configPath != "" {
if _, err := os.Stat(configPath); err == nil {
data, err := os.ReadFile(configPath)
if err != nil {
return nil, fmt.Errorf("failed to read config file: %w", err)
}
if err := yaml.Unmarshal(data, cfg); err != nil {
return nil, fmt.Errorf("failed to parse config file: %w", err)
}
log.Info().
Str("config_file", configPath).
Int("subnet_count", len(cfg.AllowedSourceSubnets)).
Msg("Loaded configuration from file")
}
}
// Append from environment variable if set
if envSubnets := os.Getenv("PULSE_SENSOR_PROXY_ALLOWED_SUBNETS"); envSubnets != "" {
envList := strings.Split(envSubnets, ",")
cfg.AllowedSourceSubnets = append(cfg.AllowedSourceSubnets, envList...)
log.Info().
Int("env_subnet_count", len(envList)).
Msg("Appended subnets from environment variable")
}
// Metrics address from environment variable
if envMetrics := os.Getenv("PULSE_SENSOR_PROXY_METRICS_ADDR"); envMetrics != "" {
cfg.MetricsAddress = envMetrics
log.Info().Str("metrics_addr", envMetrics).Msg("Metrics address set from environment")
}
// Default metrics address if not configured
if cfg.MetricsAddress == "" {
cfg.MetricsAddress = "default" // Will use defaultMetricsAddr
}
// Parse and validate all subnets
if len(cfg.AllowedSourceSubnets) > 0 {
normalized, err := parseAllowedSubnets(cfg.AllowedSourceSubnets)
if err != nil {
return nil, fmt.Errorf("invalid subnet configuration: %w", err)
}
cfg.AllowedSourceSubnets = normalized
log.Info().
Strs("allowed_subnets", cfg.AllowedSourceSubnets).
Msg("Validated and normalized subnet configuration")
} else {
// Auto-detect if no configuration provided
detected := detectHostCIDRs()
if len(detected) == 0 {
log.Warn().Msg("No allowed_source_subnets configured and no host addresses detected")
} else {
cfg.AllowedSourceSubnets = detected
log.Warn().
Strs("auto_detected_subnets", detected).
Msg("No allowed_source_subnets configured; using detected host addresses (recommended to configure explicitly)")
}
}
return cfg, nil
}
// detectHostCIDRs detects local host IP addresses as /32 (IPv4) or /128 (IPv6) CIDRs
func detectHostCIDRs() []string {
var cidrs []string
ifaces, err := net.Interfaces()
if err != nil {
log.Warn().Err(err).Msg("Failed to enumerate network interfaces")
return cidrs
}
for _, iface := range ifaces {
// Skip down or loopback interfaces
if iface.Flags&net.FlagUp == 0 || iface.Flags&net.FlagLoopback != 0 {
continue
}
addrs, err := iface.Addrs()
if err != nil {
log.Warn().Str("iface", iface.Name).Err(err).Msg("Address lookup failed")
continue
}
for _, addr := range addrs {
ipNet, ok := addr.(*net.IPNet)
if !ok {
continue
}
ip := ipNet.IP
// Skip loopback and link-local addresses
if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
continue
}
// Add as /32 for IPv4, /128 for IPv6
if ip.To4() != nil {
cidrs = append(cidrs, ip.String()+"/32")
} else if ip.To16() != nil {
cidrs = append(cidrs, ip.String()+"/128")
}
}
}
return cidrs
}
// parseAllowedSubnets validates and normalizes subnet specifications
func parseAllowedSubnets(cfg []string) ([]string, error) {
seen := make(map[string]struct{})
var normalized []string
for _, raw := range cfg {
entry := strings.TrimSpace(raw)
if entry == "" {
continue
}
// Try parsing as CIDR
if _, _, err := net.ParseCIDR(entry); err == nil {
if _, exists := seen[entry]; !exists {
seen[entry] = struct{}{}
normalized = append(normalized, entry)
}
continue
}
// Try parsing as single IP
if ip := net.ParseIP(entry); ip != nil {
norm := entry + "/32"
if ip.To4() == nil {
norm = entry + "/128"
}
if _, exists := seen[norm]; !exists {
seen[norm] = struct{}{}
normalized = append(normalized, norm)
}
continue
}
// Invalid format
return nil, fmt.Errorf("invalid subnet or address: %s", entry)
}
return normalized, nil
}

View file

@ -0,0 +1,732 @@
package main
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"os"
"os/signal"
"path/filepath"
"strings"
"syscall"
"time"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/spf13/cobra"
)
// Version information (set at build time with -ldflags)
var (
Version = "dev"
BuildTime = "unknown"
GitCommit = "unknown"
)
const (
defaultSocketPath = "/run/pulse-sensor-proxy/pulse-sensor-proxy.sock"
defaultSSHKeyPath = "/var/lib/pulse-sensor-proxy/ssh"
defaultConfigPath = "/etc/pulse-sensor-proxy/config.yaml"
maxRequestBytes = 16 * 1024 // 16 KiB max request size
)
var rootCmd = &cobra.Command{
Use: "pulse-sensor-proxy",
Short: "Pulse Sensor Proxy - Secure sensor data bridge for containerized Pulse",
Long: `Sensor monitoring proxy that keeps SSH keys on the host and exposes sensor data via unix socket`,
Version: Version,
Run: func(cmd *cobra.Command, args []string) {
runProxy()
},
}
var versionCmd = &cobra.Command{
Use: "version",
Short: "Print version information",
Run: func(cmd *cobra.Command, args []string) {
fmt.Printf("pulse-sensor-proxy %s\n", Version)
if BuildTime != "unknown" {
fmt.Printf("Built: %s\n", BuildTime)
}
if GitCommit != "unknown" {
fmt.Printf("Commit: %s\n", GitCommit)
}
},
}
func init() {
rootCmd.AddCommand(versionCmd)
}
func main() {
if err := rootCmd.Execute(); err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)
}
}
// Proxy manages the temperature monitoring proxy
type Proxy struct {
socketPath string
sshKeyPath string
listener net.Listener
rateLimiter *rateLimiter
nodeGate *nodeGate
router map[string]handlerFunc
config *Config
metrics *ProxyMetrics
}
// RPC request types
const (
RPCEnsureClusterKeys = "ensure_cluster_keys"
RPCRegisterNodes = "register_nodes"
RPCGetTemperature = "get_temperature"
RPCGetStatus = "get_status"
)
// RPCRequest represents a request from Pulse
type RPCRequest struct {
CorrelationID string `json:"correlation_id,omitempty"`
Method string `json:"method"`
Params map[string]interface{} `json:"params"`
}
// RPCResponse represents a response to Pulse
type RPCResponse struct {
CorrelationID string `json:"correlation_id,omitempty"`
Success bool `json:"success"`
Data interface{} `json:"data,omitempty"`
Error string `json:"error,omitempty"`
}
// handlerFunc is the signature for RPC method handlers
type handlerFunc func(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error)
func runProxy() {
// Initialize logger
zerolog.TimeFieldFormat = zerolog.TimeFormatUnix
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
socketPath := os.Getenv("PULSE_SENSOR_PROXY_SOCKET")
if socketPath == "" {
socketPath = defaultSocketPath
}
sshKeyPath := os.Getenv("PULSE_SENSOR_PROXY_SSH_DIR")
if sshKeyPath == "" {
sshKeyPath = defaultSSHKeyPath
}
// Load configuration
configPath := os.Getenv("PULSE_SENSOR_PROXY_CONFIG")
if configPath == "" {
configPath = defaultConfigPath
}
cfg, err := loadConfig(configPath)
if err != nil {
log.Fatal().Err(err).Msg("Failed to load configuration")
}
// Initialize metrics
metrics := NewProxyMetrics(Version)
log.Info().
Str("socket", socketPath).
Str("ssh_key_dir", sshKeyPath).
Str("config_path", configPath).
Str("version", Version).
Msg("Starting pulse-sensor-proxy")
proxy := &Proxy{
socketPath: socketPath,
sshKeyPath: sshKeyPath,
rateLimiter: newRateLimiter(),
nodeGate: newNodeGate(),
config: cfg,
metrics: metrics,
}
// Register RPC method handlers
proxy.router = map[string]handlerFunc{
RPCGetStatus: proxy.handleGetStatusV2,
RPCEnsureClusterKeys: proxy.handleEnsureClusterKeysV2,
RPCRegisterNodes: proxy.handleRegisterNodesV2,
RPCGetTemperature: proxy.handleGetTemperatureV2,
}
if err := proxy.Start(); err != nil {
log.Fatal().Err(err).Msg("Failed to start proxy")
}
// Start metrics server
if err := metrics.Start(cfg.MetricsAddress); err != nil {
log.Fatal().Err(err).Msg("Failed to start metrics server")
}
// Setup signal handlers
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
<-sigChan
log.Info().Msg("Shutting down proxy...")
proxy.Stop()
proxy.rateLimiter.shutdown()
metrics.Shutdown(context.Background())
log.Info().Msg("Proxy stopped")
}
// Start initializes and starts the proxy
func (p *Proxy) Start() error {
// Create SSH key directory if it doesn't exist
if err := os.MkdirAll(p.sshKeyPath, 0700); err != nil {
return fmt.Errorf("failed to create SSH key directory: %w", err)
}
// Ensure SSH keypair exists
if err := p.ensureSSHKeypair(); err != nil {
return fmt.Errorf("failed to ensure SSH keypair: %w", err)
}
// Remove existing socket if it exists
if err := os.RemoveAll(p.socketPath); err != nil {
return fmt.Errorf("failed to remove existing socket: %w", err)
}
// Create socket directory if needed
socketDir := filepath.Dir(p.socketPath)
if err := os.MkdirAll(socketDir, 0755); err != nil {
return fmt.Errorf("failed to create socket directory: %w", err)
}
// Create unix socket listener
listener, err := net.Listen("unix", p.socketPath)
if err != nil {
return fmt.Errorf("failed to create unix socket: %w", err)
}
p.listener = listener
// Set socket permissions to owner+group only
// We use SO_PEERCRED for authentication, so we don't need world-readable
if err := os.Chmod(p.socketPath, 0660); err != nil {
log.Warn().Err(err).Msg("Failed to set socket permissions")
}
log.Info().Str("socket", p.socketPath).Msg("Unix socket ready")
// Start accepting connections
go p.acceptConnections()
return nil
}
// Stop shuts down the proxy
func (p *Proxy) Stop() {
if p.listener != nil {
p.listener.Close()
os.Remove(p.socketPath)
}
}
// acceptConnections handles incoming socket connections
func (p *Proxy) acceptConnections() {
for {
conn, err := p.listener.Accept()
if err != nil {
// Check if listener was closed
if opErr, ok := err.(*net.OpError); ok && opErr.Err.Error() == "use of closed network connection" {
return
}
log.Error().Err(err).Msg("Failed to accept connection")
continue
}
go p.handleConnection(conn)
}
}
// handleConnection processes a single RPC request with full validation and throttling
func (p *Proxy) handleConnection(conn net.Conn) {
defer conn.Close()
// Track concurrent requests
p.metrics.queueDepth.Inc()
defer p.metrics.queueDepth.Dec()
// Start timing for latency metrics
startTime := time.Now()
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Set read deadline
if err := conn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
log.Warn().Err(err).Msg("Failed to set read deadline")
}
// Extract and verify peer credentials
cred, err := extractPeerCredentials(conn)
if err != nil {
log.Warn().Err(err).Msg("Peer credentials unavailable")
p.sendErrorV2(conn, "unauthorized", "")
return
}
// Check rate limit and concurrency
releaseLimiter, ok := p.rateLimiter.allow(peerID{uid: cred.uid, pid: cred.pid})
if !ok {
p.metrics.rateLimitHits.Inc()
log.Warn().
Uint32("uid", cred.uid).
Uint32("pid", cred.pid).
Msg("Rate limit exceeded")
p.sendErrorV2(conn, "rate limit exceeded", "")
return
}
defer releaseLimiter()
// Limit request size and decode
lr := io.LimitReader(conn, maxRequestBytes)
decoder := json.NewDecoder(lr)
decoder.DisallowUnknownFields()
var req RPCRequest
if err := decoder.Decode(&req); err != nil {
if errors.Is(err, io.EOF) || err.Error() == "EOF" {
p.sendErrorV2(conn, "empty request", "")
return
}
p.sendErrorV2(conn, "invalid request format", "")
return
}
// Check if payload was too large
if decoder.More() {
p.sendErrorV2(conn, "payload too large", req.CorrelationID)
return
}
// Sanitize correlation ID
req.CorrelationID = sanitizeCorrelationID(req.CorrelationID)
// Create contextual logger
logger := log.With().
Str("corr_id", req.CorrelationID).
Uint32("uid", cred.uid).
Uint32("pid", cred.pid).
Str("method", req.Method).
Logger()
// Prepare response
resp := RPCResponse{
CorrelationID: req.CorrelationID,
Success: false,
}
// Find handler
handler := p.router[req.Method]
if handler == nil {
resp.Error = "unknown method"
logger.Warn().Msg("Unknown method")
p.sendResponse(conn, resp)
return
}
// Execute handler
result, err := handler(ctx, &req, logger)
if err != nil {
resp.Error = err.Error()
logger.Warn().Err(err).Msg("Handler failed")
p.sendResponse(conn, resp)
// Record failed request
p.metrics.rpcRequests.WithLabelValues(req.Method, "error").Inc()
p.metrics.rpcLatency.WithLabelValues(req.Method).Observe(time.Since(startTime).Seconds())
return
}
// Success
resp.Success = true
resp.Data = result
logger.Info().Msg("Request completed")
p.sendResponse(conn, resp)
// Record successful request
p.metrics.rpcRequests.WithLabelValues(req.Method, "success").Inc()
p.metrics.rpcLatency.WithLabelValues(req.Method).Observe(time.Since(startTime).Seconds())
}
// sendError sends an error response (legacy function)
func (p *Proxy) sendError(conn net.Conn, message string) {
resp := RPCResponse{
Success: false,
Error: message,
}
encoder := json.NewEncoder(conn)
encoder.Encode(resp)
}
// sendErrorV2 sends an error response with correlation ID
func (p *Proxy) sendErrorV2(conn net.Conn, message, correlationID string) {
resp := RPCResponse{
CorrelationID: correlationID,
Success: false,
Error: message,
}
encoder := json.NewEncoder(conn)
encoder.Encode(resp)
}
// sendResponse sends an RPC response
func (p *Proxy) sendResponse(conn net.Conn, resp RPCResponse) {
encoder := json.NewEncoder(conn)
if err := encoder.Encode(resp); err != nil {
log.Error().Err(err).Msg("Failed to encode RPC response")
}
}
// handleGetStatus returns proxy status
func (p *Proxy) handleGetStatus(req RPCRequest) RPCResponse {
pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub")
pubKey, err := os.ReadFile(pubKeyPath)
if err != nil {
return RPCResponse{
Success: false,
Error: fmt.Sprintf("failed to read public key: %v", err),
}
}
return RPCResponse{
Success: true,
Data: map[string]interface{}{
"version": Version,
"public_key": string(pubKey),
"ssh_dir": p.sshKeyPath,
},
}
}
// ensureSSHKeypair generates SSH keypair if it doesn't exist
func (p *Proxy) ensureSSHKeypair() error {
privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub")
// Check if keypair already exists
if _, err := os.Stat(privKeyPath); err == nil {
if _, err := os.Stat(pubKeyPath); err == nil {
log.Info().Msg("SSH keypair already exists")
return nil
}
}
log.Info().Msg("Generating new SSH keypair")
// Generate ed25519 keypair using ssh-keygen
cmd := fmt.Sprintf("ssh-keygen -t ed25519 -f %s -N '' -C 'pulse-sensor-proxy'", privKeyPath)
if output, err := execCommand(cmd); err != nil {
return fmt.Errorf("failed to generate SSH keypair: %w (output: %s)", err, output)
}
log.Info().Str("path", privKeyPath).Msg("SSH keypair generated")
return nil
}
// handleEnsureClusterKeys discovers cluster nodes and pushes SSH keys
func (p *Proxy) handleEnsureClusterKeys(req RPCRequest) RPCResponse {
// Check if we're on a Proxmox host
if !isProxmoxHost() {
return RPCResponse{
Success: false,
Error: "not running on Proxmox host - cannot discover cluster",
}
}
// Discover cluster nodes
nodes, err := discoverClusterNodes()
if err != nil {
return RPCResponse{
Success: false,
Error: fmt.Sprintf("failed to discover cluster: %v", err),
}
}
log.Info().Strs("nodes", nodes).Msg("Discovered cluster nodes")
// Push SSH key to each node
results := make(map[string]interface{})
successCount := 0
for _, node := range nodes {
log.Info().Str("node", node).Msg("Pushing SSH key to node")
if err := p.pushSSHKey(node); err != nil {
log.Error().Err(err).Str("node", node).Msg("Failed to push SSH key")
results[node] = map[string]interface{}{
"success": false,
"error": err.Error(),
}
} else {
log.Info().Str("node", node).Msg("SSH key pushed successfully")
results[node] = map[string]interface{}{
"success": true,
}
successCount++
}
}
return RPCResponse{
Success: true,
Data: map[string]interface{}{
"nodes": nodes,
"results": results,
"success_count": successCount,
"total_count": len(nodes),
},
}
}
// handleRegisterNodes returns discovered nodes
func (p *Proxy) handleRegisterNodes(req RPCRequest) RPCResponse {
// Check if we're on a Proxmox host
if !isProxmoxHost() {
return RPCResponse{
Success: false,
Error: "not running on Proxmox host",
}
}
// Discover cluster nodes
nodes, err := discoverClusterNodes()
if err != nil {
return RPCResponse{
Success: false,
Error: fmt.Sprintf("failed to discover nodes: %v", err),
}
}
// Test SSH connectivity to each node
nodeStatus := make([]map[string]interface{}, 0, len(nodes))
for _, node := range nodes {
status := map[string]interface{}{
"name": node,
}
if err := p.testSSHConnection(node); err != nil {
status["ssh_ready"] = false
status["error"] = err.Error()
} else {
status["ssh_ready"] = true
}
nodeStatus = append(nodeStatus, status)
}
return RPCResponse{
Success: true,
Data: map[string]interface{}{
"nodes": nodeStatus,
},
}
}
// handleGetTemperature fetches temperature data from a node via SSH
func (p *Proxy) handleGetTemperature(req RPCRequest) RPCResponse {
// Extract node parameter
nodeParam, ok := req.Params["node"]
if !ok {
return RPCResponse{
Success: false,
Error: "missing 'node' parameter",
}
}
node, ok := nodeParam.(string)
if !ok {
return RPCResponse{
Success: false,
Error: "'node' parameter must be a string",
}
}
// Fetch temperature data
tempData, err := p.getTemperatureViaSSH(node)
if err != nil {
return RPCResponse{
Success: false,
Error: fmt.Sprintf("failed to get temperatures: %v", err),
}
}
return RPCResponse{
Success: true,
Data: map[string]interface{}{
"node": node,
"temperature": tempData,
},
}
}
// New V2 handlers with context and structured logging
// handleGetStatusV2 returns proxy status with context support
func (p *Proxy) handleGetStatusV2(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) {
pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub")
pubKey, err := os.ReadFile(pubKeyPath)
if err != nil {
return nil, fmt.Errorf("failed to read public key: %w", err)
}
logger.Info().Msg("Status request served")
return map[string]interface{}{
"version": Version,
"public_key": string(pubKey),
"ssh_dir": p.sshKeyPath,
}, nil
}
// handleEnsureClusterKeysV2 discovers cluster nodes and pushes SSH keys with validation
func (p *Proxy) handleEnsureClusterKeysV2(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) {
// Check if we're on a Proxmox host
if !isProxmoxHost() {
return nil, fmt.Errorf("not running on Proxmox host - cannot discover cluster")
}
// Check for optional key_dir parameter (for key rotation)
keyDir := p.sshKeyPath // default
if keyDirParam, ok := req.Params["key_dir"]; ok {
if keyDirStr, ok := keyDirParam.(string); ok && keyDirStr != "" {
keyDir = keyDirStr
logger.Info().Str("key_dir", keyDir).Msg("Using custom key directory for rotation")
}
}
// Discover cluster nodes
nodes, err := discoverClusterNodes()
if err != nil {
return nil, fmt.Errorf("failed to discover cluster: %w", err)
}
logger.Info().Strs("nodes", nodes).Msg("Discovered cluster nodes")
// Push SSH key to each node
results := make(map[string]interface{})
successCount := 0
for _, node := range nodes {
// Validate node name
if err := validateNodeName(node); err != nil {
logger.Warn().Str("node", node).Msg("Invalid node name format")
results[node] = map[string]interface{}{
"success": false,
"error": "invalid node name",
}
continue
}
logger.Info().Str("node", node).Str("key_dir", keyDir).Msg("Pushing SSH key to node")
if err := p.pushSSHKeyFrom(node, keyDir); err != nil {
logger.Error().Err(err).Str("node", node).Msg("Failed to push SSH key")
results[node] = map[string]interface{}{
"success": false,
"error": err.Error(),
}
} else {
logger.Info().Str("node", node).Msg("SSH key pushed successfully")
results[node] = map[string]interface{}{
"success": true,
}
successCount++
}
}
return map[string]interface{}{
"nodes": nodes,
"results": results,
"success_count": successCount,
"total_count": len(nodes),
}, nil
}
// handleRegisterNodesV2 returns discovered nodes with validation
func (p *Proxy) handleRegisterNodesV2(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) {
// Check if we're on a Proxmox host
if !isProxmoxHost() {
return nil, fmt.Errorf("not running on Proxmox host")
}
// Discover cluster nodes
nodes, err := discoverClusterNodes()
if err != nil {
return nil, fmt.Errorf("failed to discover nodes: %w", err)
}
// Test SSH connectivity to each node
nodeStatus := make([]map[string]interface{}, 0, len(nodes))
for _, node := range nodes {
status := map[string]interface{}{
"name": node,
}
// Validate node name
if err := validateNodeName(node); err != nil {
status["ssh_ready"] = false
status["error"] = "invalid node name"
nodeStatus = append(nodeStatus, status)
continue
}
if err := p.testSSHConnection(node); err != nil {
status["ssh_ready"] = false
status["error"] = err.Error()
} else {
status["ssh_ready"] = true
}
nodeStatus = append(nodeStatus, status)
}
logger.Info().Int("node_count", len(nodeStatus)).Msg("Node discovery completed")
return map[string]interface{}{
"nodes": nodeStatus,
}, nil
}
// handleGetTemperatureV2 fetches temperature data with concurrency control and validation
func (p *Proxy) handleGetTemperatureV2(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) {
// Extract node parameter
nodeParam, ok := req.Params["node"]
if !ok {
return nil, fmt.Errorf("missing 'node' parameter")
}
node, ok := nodeParam.(string)
if !ok {
return nil, fmt.Errorf("'node' parameter must be a string")
}
// Trim and validate node name
node = strings.TrimSpace(node)
if err := validateNodeName(node); err != nil {
logger.Warn().Str("node", node).Msg("Invalid node name format")
return nil, fmt.Errorf("invalid node name")
}
// Acquire per-node concurrency lock (prevents multiple simultaneous requests to same node)
releaseNode := p.nodeGate.acquire(node)
defer releaseNode()
logger.Debug().Str("node", node).Msg("Fetching temperature via SSH")
// Fetch temperature data
tempData, err := p.getTemperatureViaSSH(node)
if err != nil {
logger.Warn().Err(err).Str("node", node).Msg("Failed to get temperatures")
return nil, fmt.Errorf("failed to get temperatures: %w", err)
}
logger.Info().Str("node", node).Msg("Temperature data fetched successfully")
return map[string]interface{}{
"node": node,
"temperature": tempData,
}, nil
}

View file

@ -0,0 +1,167 @@
package main
import (
"context"
"net"
"net/http"
"strings"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/rs/zerolog/log"
)
const defaultMetricsAddr = "127.0.0.1:9127"
// ProxyMetrics holds Prometheus metrics for the proxy
type ProxyMetrics struct {
rpcRequests *prometheus.CounterVec
rpcLatency *prometheus.HistogramVec
sshRequests *prometheus.CounterVec
sshLatency *prometheus.HistogramVec
queueDepth prometheus.Gauge
rateLimitHits prometheus.Counter
buildInfo *prometheus.GaugeVec
server *http.Server
registry *prometheus.Registry
}
// NewProxyMetrics creates and registers all metrics
func NewProxyMetrics(version string) *ProxyMetrics {
reg := prometheus.NewRegistry()
pm := &ProxyMetrics{
rpcRequests: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "pulse_proxy_rpc_requests_total",
Help: "Total RPC requests handled by method and result.",
},
[]string{"method", "result"},
),
rpcLatency: prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "pulse_proxy_rpc_latency_seconds",
Help: "RPC handler latency.",
Buckets: []float64{0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2.5, 5},
},
[]string{"method"},
),
sshRequests: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "pulse_proxy_ssh_requests_total",
Help: "SSH command executions by node and result.",
},
[]string{"node", "result"},
),
sshLatency: prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "pulse_proxy_ssh_latency_seconds",
Help: "SSH command latency per node.",
Buckets: []float64{0.1, 0.5, 1, 2.5, 5, 10, 30},
},
[]string{"node"},
),
queueDepth: prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "pulse_proxy_queue_depth",
Help: "Concurrent RPC requests being processed.",
},
),
rateLimitHits: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "pulse_proxy_rate_limit_hits_total",
Help: "Number of RPC requests rejected due to rate limiting.",
},
),
buildInfo: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pulse_proxy_build_info",
Help: "Proxy build metadata.",
},
[]string{"version"},
),
registry: reg,
}
reg.MustRegister(
pm.rpcRequests,
pm.rpcLatency,
pm.sshRequests,
pm.sshLatency,
pm.queueDepth,
pm.rateLimitHits,
pm.buildInfo,
)
pm.buildInfo.WithLabelValues(version).Set(1)
return pm
}
// Start starts the metrics HTTP server on the specified address
func (m *ProxyMetrics) Start(addr string) error {
if addr == "" || strings.ToLower(addr) == "disabled" {
log.Info().Msg("Metrics server disabled")
return nil
}
if addr == "default" {
addr = defaultMetricsAddr
}
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{}))
ln, err := net.Listen("tcp", addr)
if err != nil {
return err
}
m.server = &http.Server{
Addr: addr,
Handler: mux,
ReadHeaderTimeout: 5 * time.Second,
}
go func() {
if err := m.server.Serve(ln); err != nil && err != http.ErrServerClosed {
log.Error().Err(err).Str("addr", addr).Msg("Metrics server stopped unexpectedly")
}
}()
log.Info().Str("addr", addr).Msg("Metrics server started")
return nil
}
// Shutdown gracefully shuts down the metrics server
func (m *ProxyMetrics) Shutdown(ctx context.Context) {
if m.server != nil {
_ = m.server.Shutdown(ctx)
}
}
// sanitizeNodeLabel converts a node name into a safe Prometheus label value
func sanitizeNodeLabel(node string) string {
const maxLen = 63
safe := strings.Builder{}
safe.Grow(len(node))
for _, r := range strings.ToLower(node) {
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' || r == '.' {
safe.WriteRune(r)
} else {
safe.WriteRune('_')
}
}
out := safe.String()
if len(out) > maxLen {
out = out[:maxLen]
}
if out == "" {
out = "unknown"
}
return out
}

View file

@ -0,0 +1,242 @@
package main
import (
"bytes"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/rs/zerolog/log"
)
// execCommand executes a shell command and returns output
func execCommand(cmd string) (string, error) {
out, err := exec.Command("sh", "-c", cmd).CombinedOutput()
return string(out), err
}
// getPublicKey reads the SSH public key from the default directory
func (p *Proxy) getPublicKey() (string, error) {
return p.getPublicKeyFrom(p.sshKeyPath)
}
// getPublicKeyFrom reads the SSH public key from a specific directory
func (p *Proxy) getPublicKeyFrom(keyDir string) (string, error) {
pubKeyPath := filepath.Join(keyDir, "id_ed25519.pub")
data, err := os.ReadFile(pubKeyPath)
if err != nil {
return "", err
}
return strings.TrimSpace(string(data)), nil
}
// buildAuthorizedKey constructs an authorized_keys entry with from= IP restrictions
func (p *Proxy) buildAuthorizedKey(pubKey string) (string, error) {
subnets := p.config.AllowedSourceSubnets
if len(subnets) == 0 {
return "", fmt.Errorf("no allowed source subnets configured or detected")
}
// Build from= clause with all allowed subnets
fromClause := fmt.Sprintf(`from="%s"`, strings.Join(subnets, ","))
// Comment helps identify and upgrade this key later
const comment = "pulse-sensor-proxy"
// Forced command with all restrictions
const forced = `command="sensors -j",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty`
// Format: from="...",command="...",no-* ssh-rsa AAAA... pulse-sensor-proxy
return fmt.Sprintf(`%s,%s %s %s`, fromClause, forced, pubKey, comment), nil
}
// pushSSHKeyFrom pushes a public key from a specific directory to a node
func (p *Proxy) pushSSHKeyFrom(nodeHost, keyDir string) error {
startTime := time.Now()
nodeLabel := sanitizeNodeLabel(nodeHost)
pubKey, err := p.getPublicKeyFrom(keyDir)
if err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return fmt.Errorf("failed to get public key from %s: %w", keyDir, err)
}
// Build the restricted authorized_keys entry
entry, err := p.buildAuthorizedKey(pubKey)
if err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return fmt.Errorf("failed to build authorized key: %w", err)
}
// Check if the exact restricted entry already exists
checkCmd := fmt.Sprintf(
`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "grep -F '%s' /root/.ssh/authorized_keys 2>/dev/null"`,
nodeHost,
entry,
)
if output, _ := execCommand(checkCmd); strings.Contains(output, entry) {
log.Debug().Str("node", nodeHost).Msg("SSH key already present with from= restrictions")
p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return nil
}
// Remove old pulse-temp-proxy and pulse-sensor-proxy entries (for upgrade path)
removeOldCmd := fmt.Sprintf(
`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "mkdir -p /root/.ssh && chmod 700 /root/.ssh && grep -v -e 'pulse-temp-proxy$' -e 'pulse-sensor-proxy$' /root/.ssh/authorized_keys > /root/.ssh/authorized_keys.tmp 2>/dev/null || touch /root/.ssh/authorized_keys.tmp"`,
nodeHost,
)
if _, err := execCommand(removeOldCmd); err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return fmt.Errorf("failed to prepare authorized_keys on %s: %w", nodeHost, err)
}
// Add the new restricted key and atomically replace the file
addCmd := fmt.Sprintf(
`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "echo '%s' >> /root/.ssh/authorized_keys.tmp && mv /root/.ssh/authorized_keys.tmp /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"`,
nodeHost,
entry,
)
if _, err := execCommand(addCmd); err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return fmt.Errorf("failed to add SSH key to %s: %w", nodeHost, err)
}
log.Info().
Str("node", nodeHost).
Str("key_dir", keyDir).
Strs("allowed_subnets", p.config.AllowedSourceSubnets).
Msg("SSH key installed with from= IP restrictions")
p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return nil
}
// pushSSHKey adds the proxy's public key to a node's authorized_keys with IP restrictions
// Automatically upgrades old keys without from= restrictions
func (p *Proxy) pushSSHKey(nodeHost string) error {
return p.pushSSHKeyFrom(nodeHost, p.sshKeyPath)
}
// testSSHConnection verifies SSH connectivity to a node
func (p *Proxy) testSSHConnection(nodeHost string) error {
startTime := time.Now()
nodeLabel := sanitizeNodeLabel(nodeHost)
privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
cmd := fmt.Sprintf(
`ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s "echo test"`,
privKeyPath,
nodeHost,
)
output, err := execCommand(cmd)
if err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return fmt.Errorf("SSH test failed: %w (output: %s)", err, output)
}
// The forced command will run "sensors -j" instead of "echo test"
// So we should get JSON output, not "test"
// For now, just check that connection succeeded
p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return nil
}
// getTemperatureViaSSH fetches temperature data from a node
func (p *Proxy) getTemperatureViaSSH(nodeHost string) (string, error) {
startTime := time.Now()
nodeLabel := sanitizeNodeLabel(nodeHost)
privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
// Since we use ForceCommand="sensors -j", any SSH command will run sensors
// We don't need to specify the command
cmd := fmt.Sprintf(
`ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s ""`,
privKeyPath,
nodeHost,
)
output, err := execCommand(cmd)
if err != nil {
p.metrics.sshRequests.WithLabelValues(nodeLabel, "error").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return "", fmt.Errorf("failed to fetch temperatures: %w", err)
}
p.metrics.sshRequests.WithLabelValues(nodeLabel, "success").Inc()
p.metrics.sshLatency.WithLabelValues(nodeLabel).Observe(time.Since(startTime).Seconds())
return output, nil
}
// discoverClusterNodes discovers all nodes in the Proxmox cluster
func discoverClusterNodes() ([]string, error) {
// Check if pvecm is available (only on Proxmox hosts)
if _, err := exec.LookPath("pvecm"); err != nil {
return nil, fmt.Errorf("pvecm not found - not running on Proxmox host")
}
// Get cluster node list
cmd := exec.Command("pvecm", "nodes")
var out bytes.Buffer
cmd.Stdout = &out
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("failed to get cluster nodes: %w", err)
}
// Parse output
// Format:
// Membership information
// ----------------------
// Nodeid Votes Name
// 1 1 node1
// 2 1 node2
var nodes []string
lines := strings.Split(out.String(), "\n")
for _, line := range lines {
fields := strings.Fields(line)
// Skip header lines and empty lines
if len(fields) < 3 {
continue
}
// Check if first field is numeric (node ID)
if fields[0][0] >= '0' && fields[0][0] <= '9' {
nodeName := fields[2]
nodes = append(nodes, nodeName)
}
}
if len(nodes) == 0 {
return nil, fmt.Errorf("no cluster nodes found")
}
return nodes, nil
}
// isProxmoxHost checks if we're running on a Proxmox host
func isProxmoxHost() bool {
// Check for pvecm command
if _, err := exec.LookPath("pvecm"); err == nil {
return true
}
// Check for /etc/pve directory
if info, err := os.Stat("/etc/pve"); err == nil && info.IsDir() {
return true
}
return false
}

View file

@ -0,0 +1,140 @@
package main
import (
"sync"
"time"
"golang.org/x/time/rate"
)
// peerID identifies a connecting process by UID+PID
type peerID struct {
uid uint32
pid uint32
}
// limiterEntry holds rate limiting and concurrency controls for a peer
type limiterEntry struct {
limiter *rate.Limiter // throughput: 20/min with burst 10
semaphore chan struct{} // concurrency: cap 10
lastSeen time.Time
}
// rateLimiter manages per-peer rate limits and concurrency
type rateLimiter struct {
mu sync.Mutex
entries map[peerID]*limiterEntry
quitChan chan struct{}
}
// newRateLimiter creates a new rate limiter with cleanup loop
func newRateLimiter() *rateLimiter {
rl := &rateLimiter{
entries: make(map[peerID]*limiterEntry),
quitChan: make(chan struct{}),
}
go rl.cleanupLoop()
return rl
}
// allow checks if a peer is allowed to make a request and reserves a concurrency slot
// Returns a release function and whether the request is allowed
func (rl *rateLimiter) allow(id peerID) (release func(), allowed bool) {
rl.mu.Lock()
entry := rl.entries[id]
if entry == nil {
entry = &limiterEntry{
limiter: rate.NewLimiter(rate.Every(time.Minute/20), 10), // 20/min, burst 10
semaphore: make(chan struct{}, 10), // max 10 concurrent
}
rl.entries[id] = entry
}
entry.lastSeen = time.Now()
rl.mu.Unlock()
// Check rate limit
if !entry.limiter.Allow() {
return nil, false
}
// Try to acquire concurrency slot
select {
case entry.semaphore <- struct{}{}:
return func() { <-entry.semaphore }, true
default:
return nil, false // max concurrent in-flight reached
}
}
// cleanupLoop periodically removes idle peer entries
func (rl *rateLimiter) cleanupLoop() {
ticker := time.NewTicker(5 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ticker.C:
rl.mu.Lock()
for id, entry := range rl.entries {
if time.Since(entry.lastSeen) > 10*time.Minute {
delete(rl.entries, id)
}
}
rl.mu.Unlock()
case <-rl.quitChan:
return
}
}
}
// shutdown stops the cleanup loop
func (rl *rateLimiter) shutdown() {
close(rl.quitChan)
}
// nodeGate controls per-node concurrency for temperature requests
type nodeGate struct {
mu sync.Mutex
inFlight map[string]*nodeLock
}
// nodeLock tracks in-flight requests for a specific node
type nodeLock struct {
refCount int
guard chan struct{}
}
// newNodeGate creates a new node concurrency gate
func newNodeGate() *nodeGate {
return &nodeGate{
inFlight: make(map[string]*nodeLock),
}
}
// acquire gets exclusive access to make requests to a node
// Returns a release function that must be called when done
func (g *nodeGate) acquire(node string) func() {
g.mu.Lock()
lock := g.inFlight[node]
if lock == nil {
lock = &nodeLock{
guard: make(chan struct{}, 1), // single slot = only one SSH fetch per node
}
g.inFlight[node] = lock
}
lock.refCount++
g.mu.Unlock()
// Wait for exclusive access
lock.guard <- struct{}{}
// Return release function
return func() {
<-lock.guard
g.mu.Lock()
lock.refCount--
if lock.refCount == 0 {
delete(g.inFlight, node)
}
g.mu.Unlock()
}
}

View file

@ -0,0 +1,33 @@
package main
import (
"fmt"
"regexp"
"github.com/google/uuid"
)
var (
// nodeNameRegex validates node names (alphanumeric, dots, underscores, hyphens, 1-64 chars)
nodeNameRegex = regexp.MustCompile(`^[a-zA-Z0-9._-]{1,64}$`)
)
// sanitizeCorrelationID validates and sanitizes a correlation ID
// Returns a valid UUID, generating a new one if input is missing or invalid
func sanitizeCorrelationID(id string) string {
if id == "" {
return uuid.NewString()
}
if _, err := uuid.Parse(id); err != nil {
return uuid.NewString()
}
return id
}
// validateNodeName checks if a node name is in valid format
func validateNodeName(name string) error {
if !nodeNameRegex.MatchString(name) {
return fmt.Errorf("invalid node name")
}
return nil
}

View file

@ -1,434 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"net"
"os"
"os/signal"
"path/filepath"
"syscall"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/spf13/cobra"
)
// Version information (set at build time with -ldflags)
var (
Version = "dev"
BuildTime = "unknown"
GitCommit = "unknown"
)
const (
defaultSocketPath = "/run/pulse-temp-proxy/pulse-temp-proxy.sock"
defaultSSHKeyPath = "/var/lib/pulse-temp-proxy/ssh"
)
var rootCmd = &cobra.Command{
Use: "pulse-temp-proxy",
Short: "Pulse Temperature Proxy - Secure SSH bridge for containerized Pulse",
Long: `Temperature monitoring proxy that keeps SSH keys on the host and exposes temperature data via unix socket`,
Version: Version,
Run: func(cmd *cobra.Command, args []string) {
runProxy()
},
}
var versionCmd = &cobra.Command{
Use: "version",
Short: "Print version information",
Run: func(cmd *cobra.Command, args []string) {
fmt.Printf("pulse-temp-proxy %s\n", Version)
if BuildTime != "unknown" {
fmt.Printf("Built: %s\n", BuildTime)
}
if GitCommit != "unknown" {
fmt.Printf("Commit: %s\n", GitCommit)
}
},
}
func init() {
rootCmd.AddCommand(versionCmd)
}
func main() {
if err := rootCmd.Execute(); err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)
}
}
// Proxy manages the temperature monitoring proxy
type Proxy struct {
socketPath string
sshKeyPath string
listener net.Listener
}
// RPC request types
const (
RPCEnsureClusterKeys = "ensure_cluster_keys"
RPCRegisterNodes = "register_nodes"
RPCGetTemperature = "get_temperature"
RPCGetStatus = "get_status"
)
// RPCRequest represents a request from Pulse
type RPCRequest struct {
Method string `json:"method"`
Params map[string]interface{} `json:"params"`
}
// RPCResponse represents a response to Pulse
type RPCResponse struct {
Success bool `json:"success"`
Data interface{} `json:"data,omitempty"`
Error string `json:"error,omitempty"`
}
func runProxy() {
// Initialize logger
zerolog.TimeFieldFormat = zerolog.TimeFormatUnix
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
socketPath := os.Getenv("PULSE_TEMP_PROXY_SOCKET")
if socketPath == "" {
socketPath = defaultSocketPath
}
sshKeyPath := os.Getenv("PULSE_TEMP_PROXY_SSH_DIR")
if sshKeyPath == "" {
sshKeyPath = defaultSSHKeyPath
}
log.Info().
Str("socket", socketPath).
Str("ssh_key_dir", sshKeyPath).
Msg("Starting pulse-temp-proxy")
proxy := &Proxy{
socketPath: socketPath,
sshKeyPath: sshKeyPath,
}
if err := proxy.Start(); err != nil {
log.Fatal().Err(err).Msg("Failed to start proxy")
}
// Setup signal handlers
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
<-sigChan
log.Info().Msg("Shutting down proxy...")
proxy.Stop()
log.Info().Msg("Proxy stopped")
}
// Start initializes and starts the proxy
func (p *Proxy) Start() error {
// Create SSH key directory if it doesn't exist
if err := os.MkdirAll(p.sshKeyPath, 0700); err != nil {
return fmt.Errorf("failed to create SSH key directory: %w", err)
}
// Ensure SSH keypair exists
if err := p.ensureSSHKeypair(); err != nil {
return fmt.Errorf("failed to ensure SSH keypair: %w", err)
}
// Remove existing socket if it exists
if err := os.RemoveAll(p.socketPath); err != nil {
return fmt.Errorf("failed to remove existing socket: %w", err)
}
// Create socket directory if needed
socketDir := filepath.Dir(p.socketPath)
if err := os.MkdirAll(socketDir, 0755); err != nil {
return fmt.Errorf("failed to create socket directory: %w", err)
}
// Create unix socket listener
listener, err := net.Listen("unix", p.socketPath)
if err != nil {
return fmt.Errorf("failed to create unix socket: %w", err)
}
p.listener = listener
// Set socket permissions to owner+group only
// We use SO_PEERCRED for authentication, so we don't need world-readable
if err := os.Chmod(p.socketPath, 0660); err != nil {
log.Warn().Err(err).Msg("Failed to set socket permissions")
}
log.Info().Str("socket", p.socketPath).Msg("Unix socket ready")
// Start accepting connections
go p.acceptConnections()
return nil
}
// Stop shuts down the proxy
func (p *Proxy) Stop() {
if p.listener != nil {
p.listener.Close()
os.Remove(p.socketPath)
}
}
// acceptConnections handles incoming socket connections
func (p *Proxy) acceptConnections() {
for {
conn, err := p.listener.Accept()
if err != nil {
// Check if listener was closed
if opErr, ok := err.(*net.OpError); ok && opErr.Err.Error() == "use of closed network connection" {
return
}
log.Error().Err(err).Msg("Failed to accept connection")
continue
}
go p.handleConnection(conn)
}
}
// handleConnection processes a single RPC request
func (p *Proxy) handleConnection(conn net.Conn) {
defer conn.Close()
// Verify peer credentials (SO_PEERCRED authentication)
if err := verifyPeerCredentials(conn); err != nil {
log.Warn().Err(err).Msg("Unauthorized connection attempt")
p.sendError(conn, "unauthorized")
return
}
// Decode request
var req RPCRequest
decoder := json.NewDecoder(conn)
if err := decoder.Decode(&req); err != nil {
log.Error().Err(err).Msg("Failed to decode RPC request")
p.sendError(conn, "invalid request format")
return
}
log.Debug().Str("method", req.Method).Msg("Received RPC request")
// Route to handler
var resp RPCResponse
switch req.Method {
case RPCGetStatus:
resp = p.handleGetStatus(req)
case RPCEnsureClusterKeys:
resp = p.handleEnsureClusterKeys(req)
case RPCRegisterNodes:
resp = p.handleRegisterNodes(req)
case RPCGetTemperature:
resp = p.handleGetTemperature(req)
default:
resp = RPCResponse{
Success: false,
Error: fmt.Sprintf("unknown method: %s", req.Method),
}
}
// Send response
encoder := json.NewEncoder(conn)
if err := encoder.Encode(resp); err != nil {
log.Error().Err(err).Msg("Failed to encode RPC response")
}
}
// sendError sends an error response
func (p *Proxy) sendError(conn net.Conn, message string) {
resp := RPCResponse{
Success: false,
Error: message,
}
encoder := json.NewEncoder(conn)
encoder.Encode(resp)
}
// handleGetStatus returns proxy status
func (p *Proxy) handleGetStatus(req RPCRequest) RPCResponse {
pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub")
pubKey, err := os.ReadFile(pubKeyPath)
if err != nil {
return RPCResponse{
Success: false,
Error: fmt.Sprintf("failed to read public key: %v", err),
}
}
return RPCResponse{
Success: true,
Data: map[string]interface{}{
"version": Version,
"public_key": string(pubKey),
"ssh_dir": p.sshKeyPath,
},
}
}
// ensureSSHKeypair generates SSH keypair if it doesn't exist
func (p *Proxy) ensureSSHKeypair() error {
privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub")
// Check if keypair already exists
if _, err := os.Stat(privKeyPath); err == nil {
if _, err := os.Stat(pubKeyPath); err == nil {
log.Info().Msg("SSH keypair already exists")
return nil
}
}
log.Info().Msg("Generating new SSH keypair")
// Generate ed25519 keypair using ssh-keygen
cmd := fmt.Sprintf("ssh-keygen -t ed25519 -f %s -N '' -C 'pulse-temp-proxy'", privKeyPath)
if output, err := execCommand(cmd); err != nil {
return fmt.Errorf("failed to generate SSH keypair: %w (output: %s)", err, output)
}
log.Info().Str("path", privKeyPath).Msg("SSH keypair generated")
return nil
}
// handleEnsureClusterKeys discovers cluster nodes and pushes SSH keys
func (p *Proxy) handleEnsureClusterKeys(req RPCRequest) RPCResponse {
// Check if we're on a Proxmox host
if !isProxmoxHost() {
return RPCResponse{
Success: false,
Error: "not running on Proxmox host - cannot discover cluster",
}
}
// Discover cluster nodes
nodes, err := discoverClusterNodes()
if err != nil {
return RPCResponse{
Success: false,
Error: fmt.Sprintf("failed to discover cluster: %v", err),
}
}
log.Info().Strs("nodes", nodes).Msg("Discovered cluster nodes")
// Push SSH key to each node
results := make(map[string]interface{})
successCount := 0
for _, node := range nodes {
log.Info().Str("node", node).Msg("Pushing SSH key to node")
if err := p.pushSSHKey(node); err != nil {
log.Error().Err(err).Str("node", node).Msg("Failed to push SSH key")
results[node] = map[string]interface{}{
"success": false,
"error": err.Error(),
}
} else {
log.Info().Str("node", node).Msg("SSH key pushed successfully")
results[node] = map[string]interface{}{
"success": true,
}
successCount++
}
}
return RPCResponse{
Success: true,
Data: map[string]interface{}{
"nodes": nodes,
"results": results,
"success_count": successCount,
"total_count": len(nodes),
},
}
}
// handleRegisterNodes returns discovered nodes
func (p *Proxy) handleRegisterNodes(req RPCRequest) RPCResponse {
// Check if we're on a Proxmox host
if !isProxmoxHost() {
return RPCResponse{
Success: false,
Error: "not running on Proxmox host",
}
}
// Discover cluster nodes
nodes, err := discoverClusterNodes()
if err != nil {
return RPCResponse{
Success: false,
Error: fmt.Sprintf("failed to discover nodes: %v", err),
}
}
// Test SSH connectivity to each node
nodeStatus := make([]map[string]interface{}, 0, len(nodes))
for _, node := range nodes {
status := map[string]interface{}{
"name": node,
}
if err := p.testSSHConnection(node); err != nil {
status["ssh_ready"] = false
status["error"] = err.Error()
} else {
status["ssh_ready"] = true
}
nodeStatus = append(nodeStatus, status)
}
return RPCResponse{
Success: true,
Data: map[string]interface{}{
"nodes": nodeStatus,
},
}
}
// handleGetTemperature fetches temperature data from a node via SSH
func (p *Proxy) handleGetTemperature(req RPCRequest) RPCResponse {
// Extract node parameter
nodeParam, ok := req.Params["node"]
if !ok {
return RPCResponse{
Success: false,
Error: "missing 'node' parameter",
}
}
node, ok := nodeParam.(string)
if !ok {
return RPCResponse{
Success: false,
Error: "'node' parameter must be a string",
}
}
// Fetch temperature data
tempData, err := p.getTemperatureViaSSH(node)
if err != nil {
return RPCResponse{
Success: false,
Error: fmt.Sprintf("failed to get temperatures: %v", err),
}
}
return RPCResponse{
Success: true,
Data: map[string]interface{}{
"node": node,
"temperature": tempData,
},
}
}

View file

@ -1,161 +0,0 @@
package main
import (
"bytes"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
)
// execCommand executes a shell command and returns output
func execCommand(cmd string) (string, error) {
out, err := exec.Command("sh", "-c", cmd).CombinedOutput()
return string(out), err
}
// getPublicKey reads the SSH public key
func (p *Proxy) getPublicKey() (string, error) {
pubKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519.pub")
data, err := os.ReadFile(pubKeyPath)
if err != nil {
return "", err
}
return strings.TrimSpace(string(data)), nil
}
// pushSSHKey adds the proxy's public key to a node's authorized_keys with restrictions
func (p *Proxy) pushSSHKey(nodeHost string) error {
pubKey, err := p.getPublicKey()
if err != nil {
return fmt.Errorf("failed to get public key: %w", err)
}
// Create forced command entry with restrictions
// This limits the key to only running "sensors -j"
authorizedKey := fmt.Sprintf(`command="sensors -j",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty %s`, pubKey)
// Build SSH command to add key to remote node
// First, check if key already exists to avoid duplicates
checkCmd := fmt.Sprintf(
`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "grep -F '%s' /root/.ssh/authorized_keys 2>/dev/null"`,
nodeHost,
pubKey,
)
if output, _ := execCommand(checkCmd); strings.Contains(output, pubKey) {
return nil // Key already exists
}
// Add the key
addCmd := fmt.Sprintf(
`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@%s "mkdir -p /root/.ssh && chmod 700 /root/.ssh && echo '%s' >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"`,
nodeHost,
authorizedKey,
)
if _, err := execCommand(addCmd); err != nil {
return fmt.Errorf("failed to add SSH key to %s: %w", nodeHost, err)
}
return nil
}
// testSSHConnection verifies SSH connectivity to a node
func (p *Proxy) testSSHConnection(nodeHost string) error {
privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
cmd := fmt.Sprintf(
`ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s "echo test"`,
privKeyPath,
nodeHost,
)
output, err := execCommand(cmd)
if err != nil {
return fmt.Errorf("SSH test failed: %w (output: %s)", err, output)
}
// The forced command will run "sensors -j" instead of "echo test"
// So we should get JSON output, not "test"
// For now, just check that connection succeeded
return nil
}
// getTemperatureViaSSH fetches temperature data from a node
func (p *Proxy) getTemperatureViaSSH(nodeHost string) (string, error) {
privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
// Since we use ForceCommand="sensors -j", any SSH command will run sensors
// We don't need to specify the command
cmd := fmt.Sprintf(
`ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s ""`,
privKeyPath,
nodeHost,
)
output, err := execCommand(cmd)
if err != nil {
return "", fmt.Errorf("failed to fetch temperatures: %w", err)
}
return output, nil
}
// discoverClusterNodes discovers all nodes in the Proxmox cluster
func discoverClusterNodes() ([]string, error) {
// Check if pvecm is available (only on Proxmox hosts)
if _, err := exec.LookPath("pvecm"); err != nil {
return nil, fmt.Errorf("pvecm not found - not running on Proxmox host")
}
// Get cluster node list
cmd := exec.Command("pvecm", "nodes")
var out bytes.Buffer
cmd.Stdout = &out
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("failed to get cluster nodes: %w", err)
}
// Parse output
// Format:
// Membership information
// ----------------------
// Nodeid Votes Name
// 1 1 node1
// 2 1 node2
var nodes []string
lines := strings.Split(out.String(), "\n")
for _, line := range lines {
fields := strings.Fields(line)
// Skip header lines and empty lines
if len(fields) < 3 {
continue
}
// Check if first field is numeric (node ID)
if fields[0][0] >= '0' && fields[0][0] <= '9' {
nodeName := fields[2]
nodes = append(nodes, nodeName)
}
}
if len(nodes) == 0 {
return nil, fmt.Errorf("no cluster nodes found")
}
return nodes, nil
}
// isProxmoxHost checks if we're running on a Proxmox host
func isProxmoxHost() bool {
// Check for pvecm command
if _, err := exec.LookPath("pvecm"); err == nil {
return true
}
// Check for /etc/pve directory
if info, err := os.Stat("/etc/pve"); err == nil && info.IsDir() {
return true
}
return false
}

View file

@ -0,0 +1,962 @@
# Pulse Temperature Proxy - Security Hardening Guide
## Overview
The `pulse-sensor-proxy` is a host-side service that provides secure temperature monitoring for containerized Pulse deployments. It addresses a critical security concern: SSH keys stored inside LXC containers can be exfiltrated if the container is compromised.
**Architecture:**
- Host-side proxy runs with minimal privileges on each Proxmox node
- Containerized Pulse communicates via Unix socket (`/run/pulse-sensor-proxy/pulse-sensor-proxy.sock`)
- Proxy authenticates containers using Linux `SO_PEERCRED` (UID/PID verification)
- SSH keys never leave the host filesystem
**Threat Model:**
- ✅ Container compromise cannot access SSH keys
- ✅ Container cannot directly SSH to cluster nodes
- ✅ Rate limiting prevents abuse via socket
- ✅ IP restrictions on SSH keys limit lateral movement
- ✅ Audit logging tracks all temperature requests
## Prerequisites
- Proxmox VE 7.0+ or Proxmox Backup Server 2.0+
- LXC container running Pulse (unprivileged recommended)
- Root access to Proxmox host(s)
- `lm-sensors` installed on all nodes
- Cluster SSH access configured (root passwordless SSH between nodes)
## Host Hardening
### Service Account
The proxy runs as the `pulse-sensor-proxy` user with these characteristics:
- System account (no login shell: `/usr/sbin/nologin`)
- No home directory
- Dedicated group: `pulse-sensor-proxy`
- Owns `/var/lib/pulse-sensor-proxy` and `/run/pulse-sensor-proxy`
**Verify service account:**
```bash
# Check user exists
id pulse-sensor-proxy
# Expected output:
# uid=XXX(pulse-sensor-proxy) gid=XXX(pulse-sensor-proxy) groups=XXX(pulse-sensor-proxy)
# Check shell (should be /usr/sbin/nologin)
getent passwd pulse-sensor-proxy | cut -d: -f7
```
### Systemd Unit Security
The systemd unit includes comprehensive hardening directives:
**Key security features:**
- `User=pulse-sensor-proxy` / `Group=pulse-sensor-proxy` - Unprivileged execution
- `NoNewPrivileges=true` - Prevents privilege escalation
- `ProtectSystem=strict` - Read-only `/usr`, `/boot`, `/efi`
- `ProtectHome=true` - Inaccessible `/home`, `/root`, `/run/user`
- `PrivateTmp=true` - Isolated `/tmp` and `/var/tmp`
- `SystemCallFilter=@system-service` - Restricted syscalls
- `CapabilityBoundingSet=` - No capabilities granted
- `RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6` - Socket restrictions
**Verify systemd security:**
```bash
# Check service status
systemctl status pulse-sensor-proxy
# Verify user/group
ps aux | grep pulse-sensor-proxy | grep -v grep
# Expected: pulse-sensor-proxy user, not root
# Check systemd security settings
systemctl show pulse-sensor-proxy | grep -E '(User=|NoNewPrivileges|ProtectSystem|CapabilityBoundingSet)'
```
### File Permissions
**Critical paths and ownership:**
```
/var/lib/pulse-sensor-proxy/ pulse-sensor-proxy:pulse-sensor-proxy 0750
├── ssh/ pulse-sensor-proxy:pulse-sensor-proxy 0700
│ ├── id_ed25519 pulse-sensor-proxy:pulse-sensor-proxy 0600
│ └── id_ed25519.pub pulse-sensor-proxy:pulse-sensor-proxy 0640
└── ssh.d/ pulse-sensor-proxy:pulse-sensor-proxy 0750
├── next/ pulse-sensor-proxy:pulse-sensor-proxy 0750
└── prev/ pulse-sensor-proxy:pulse-sensor-proxy 0750
/run/pulse-sensor-proxy/ pulse-sensor-proxy:pulse-sensor-proxy 0775
└── pulse-sensor-proxy.sock pulse-sensor-proxy:pulse-sensor-proxy 0777
```
**Verify permissions:**
```bash
# Check base directory
ls -ld /var/lib/pulse-sensor-proxy/
# Expected: drwxr-x--- pulse-sensor-proxy pulse-sensor-proxy
# Check SSH keys
ls -l /var/lib/pulse-sensor-proxy/ssh/
# Expected:
# -rw------- pulse-sensor-proxy pulse-sensor-proxy id_ed25519
# -rw-r----- pulse-sensor-proxy pulse-sensor-proxy id_ed25519.pub
# Check socket directory (note: 0775 for container access)
ls -ld /run/pulse-sensor-proxy/
# Expected: drwxrwxr-x pulse-sensor-proxy pulse-sensor-proxy
```
**Why 0775 on socket directory?**
The socket directory needs `0775` (not `0770`) to allow the container's unprivileged UID (e.g., 1001) to traverse into the directory and access the socket. The socket itself is `0777` as access control is enforced via `SO_PEERCRED`.
## LXC Container Requirements
### Configuration Summary
| Setting | Value | Purpose |
|---------|-------|---------|
| `lxc.idmap` | `u 0 100000 65536`<br>`g 0 100000 65536` | Unprivileged UID/GID mapping |
| `lxc.apparmor.profile` | `generated` or custom | AppArmor confinement |
| `lxc.cap.drop` | `sys_admin` (optional) | Drop dangerous capabilities |
| `lxc.mount.entry` | Directory-level bind mount | Socket access from container |
### Sample LXC Configuration
**In `/etc/pve/lxc/<VMID>.conf`:**
```ini
# Unprivileged container (required)
unprivileged: 1
# AppArmor profile (recommended)
lxc.apparmor.profile: generated
# Drop CAP_SYS_ADMIN if feasible (optional but recommended)
# WARNING: May break some container management operations
lxc.cap.drop: sys_admin
# Bind mount proxy socket directory (REQUIRED)
# Note: Directory-level mount, not socket-level (socket is recreated by systemd)
lxc.mount.entry: /run/pulse-sensor-proxy run/pulse-sensor-proxy none bind,create=dir 0 0
```
**Key points:**
- **Directory-level mount**: Mount `/run/pulse-sensor-proxy` directory, not the socket file itself
- **Why directory mount?** Systemd recreates the socket on restart; socket-level mounts break on recreation
- **Mode 0775**: Socket directory needs group+other execute permissions for container UID traversal
- **Socket 0777**: Actual socket is world-writable; security enforced via `SO_PEERCRED` authentication
### Runtime Verification
**Check container is unprivileged:**
```bash
# On host
pct config <VMID> | grep unprivileged
# Expected: unprivileged: 1
# Inside container
cat /proc/self/uid_map
# Expected: 0 100000 65536 (or similar)
# NOT: 0 0 4294967295 (privileged)
```
**Check AppArmor confinement:**
```bash
# Inside container
cat /proc/self/attr/current
# Expected: lxc-<vmid>_</var/lib/lxc> (enforcing) or similar
# NOT: unconfined
```
**Check namespace isolation:**
```bash
# Inside container
ls -li /proc/self/ns/
# Each namespace should have a unique inode number, different from host
```
**Check capabilities:**
```bash
# Inside container
capsh --print | grep Current
# Should show limited capability set
# If lxc.cap.drop: sys_admin is set, CAP_SYS_ADMIN should be absent
```
**Check bind mount:**
```bash
# Inside container
ls -la /run/pulse-sensor-proxy/
# Expected: pulse-sensor-proxy.sock visible
# Test socket access (requires Pulse to attempt connection)
socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock
# Should connect (may timeout waiting for input, but connection succeeds)
```
## Key Management
### SSH Key Restrictions
All SSH keys deployed to cluster nodes include these restrictions:
- `command="sensors -j"` - Forced command (only sensors allowed)
- `from="<subnets>"` - IP address restrictions
- `no-port-forwarding` - Disable port forwarding
- `no-X11-forwarding` - Disable X11 forwarding
- `no-agent-forwarding` - Disable agent forwarding
- `no-pty` - Disable PTY allocation
**Example authorized_keys entry:**
```
from="192.168.0.0/24,10.0.0.0/8",command="sensors -j",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty ssh-ed25519 AAAA... pulse-sensor-proxy
```
**Configure allowed subnets:**
Create `/etc/pulse-sensor-proxy/config.yaml`:
```yaml
allowed_source_subnets:
- "192.168.0.0/24" # LAN subnet
- "10.0.0.0/8" # VPN subnet
```
Or use environment variable:
```bash
# In /etc/default/pulse-sensor-proxy (loaded by systemd)
PULSE_SENSOR_PROXY_ALLOWED_SUBNETS="192.168.0.0/24,10.0.0.0/8"
```
**Auto-detection:**
If no subnets are configured, the proxy auto-detects host IP addresses and uses them as `/32` (IPv4) or `/128` (IPv6) CIDRs. This is secure but brittle (breaks if host IP changes). Explicit configuration is recommended.
**Verify SSH restrictions:**
```bash
# On any cluster node
grep pulse-sensor-proxy /root/.ssh/authorized_keys
# Expected format:
# from="...",command="sensors -j",no-* ssh-ed25519 AAAA... pulse-sensor-proxy
```
### Key Rotation
**Rotation cadence:**
- Recommended: Every 90 days
- Minimum: Every 180 days
- After incident: Immediately
**Rotation workflow:**
The `pulse-sensor-proxy-rotate-keys.sh` script performs staged rotation with verification:
1. **Dry-run (recommended first):**
```bash
/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --dry-run
```
Shows what would happen without making changes.
2. **Perform rotation:**
```bash
/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh
```
**What happens:**
- Generates new Ed25519 keypair in `/var/lib/pulse-sensor-proxy/ssh.d/next/`
- Pushes new key to all cluster nodes (via RPC `ensure_cluster_keys`)
- Verifies SSH connectivity with new key on each node
- Atomically swaps keys:
- Current `/ssh/``/ssh.d/prev/` (backup)
- Staging `/ssh.d/next/``/ssh/` (active)
- Old keys preserved in `/ssh.d/prev/` for rollback
3. **If rotation fails, rollback:**
```bash
/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --rollback
```
Restores previous keypair from `/ssh.d/prev/` and re-pushes to cluster nodes.
**Post-rotation verification:**
```bash
# Check new key timestamp
stat /var/lib/pulse-sensor-proxy/ssh/id_ed25519
# Verify all nodes have new key
for node in pve1 pve2 pve3; do
echo "=== $node ==="
ssh root@$node "grep pulse-sensor-proxy /root/.ssh/authorized_keys | tail -1"
done
# Test temperature fetch via proxy
curl -s --unix-socket /run/pulse-sensor-proxy/pulse-sensor-proxy.sock \
-d '{"correlation_id":"test","method":"get_temp","params":{"node":"pve1"}}' \
| jq .
```
### Automated Rotation (Optional)
**Create systemd timer:**
`/etc/systemd/system/pulse-sensor-proxy-key-rotation.service`:
```ini
[Unit]
Description=Rotate pulse-sensor-proxy SSH keys
After=pulse-sensor-proxy.service
Requires=pulse-sensor-proxy.service
[Service]
Type=oneshot
ExecStart=/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh
StandardOutput=journal
StandardError=journal
```
`/etc/systemd/system/pulse-sensor-proxy-key-rotation.timer`:
```ini
[Unit]
Description=Rotate pulse-sensor-proxy SSH keys every 90 days
Requires=pulse-sensor-proxy-key-rotation.service
[Timer]
OnCalendar=quarterly
RandomizedDelaySec=1h
Persistent=true
[Install]
WantedBy=timers.target
```
**Enable timer:**
```bash
systemctl daemon-reload
systemctl enable --now pulse-sensor-proxy-key-rotation.timer
# Check next run
systemctl list-timers pulse-sensor-proxy-key-rotation.timer
```
## Monitoring & Auditing
### Metrics Endpoint
The proxy exposes Prometheus metrics on `127.0.0.1:9127` by default.
**Available metrics:**
- `pulse_proxy_rpc_requests_total{method, result}` - RPC request counter
- `pulse_proxy_rpc_latency_seconds{method}` - RPC handler latency histogram
- `pulse_proxy_ssh_requests_total{node, result}` - SSH request counter per node
- `pulse_proxy_ssh_latency_seconds{node}` - SSH latency histogram per node
- `pulse_proxy_queue_depth` - Concurrent RPC requests (gauge)
- `pulse_proxy_rate_limit_hits_total` - Rejected requests due to rate limiting
- `pulse_proxy_build_info{version}` - Build metadata
**Configure metrics address:**
In `/etc/default/pulse-sensor-proxy`:
```bash
# Listen on all interfaces (WARNING: exposes metrics externally)
PULSE_SENSOR_PROXY_METRICS_ADDR="0.0.0.0:9127"
# Disable metrics
PULSE_SENSOR_PROXY_METRICS_ADDR="disabled"
```
**Test metrics endpoint:**
```bash
curl -s http://127.0.0.1:9127/metrics | grep pulse_proxy
```
### Prometheus Integration
**Sample scrape configuration:**
```yaml
scrape_configs:
- job_name: 'pulse-sensor-proxy'
static_configs:
- targets:
- 'pve1:9127'
- 'pve2:9127'
- 'pve3:9127'
relabel_configs:
- source_labels: [__address__]
regex: '([^:]+):.+'
target_label: instance
```
### Alert Rules
**Recommended Prometheus alerts:**
```yaml
groups:
- name: pulse-sensor-proxy
rules:
# High SSH failure rate
- alert: PulseProxySSHFailureRate
expr: |
rate(pulse_proxy_ssh_requests_total{result="error"}[5m]) > 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "High SSH failure rate on {{ $labels.instance }}"
description: "{{ $value | humanize }} SSH requests/sec failing"
# Rate limiting active
- alert: PulseProxyRateLimiting
expr: |
rate(pulse_proxy_rate_limit_hits_total[5m]) > 0
for: 5m
labels:
severity: warning
annotations:
summary: "Rate limiting active on {{ $labels.instance }}"
description: "Proxy rejecting requests due to rate limits"
# High queue depth
- alert: PulseProxyQueueDepth
expr: pulse_proxy_queue_depth > 5
for: 5m
labels:
severity: warning
annotations:
summary: "High RPC queue depth on {{ $labels.instance }}"
description: "{{ $value }} concurrent requests (threshold: 5)"
# Proxy down
- alert: PulseProxyDown
expr: up{job="pulse-sensor-proxy"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Pulse proxy down on {{ $labels.instance }}"
```
### Audit Logging
**Log format:**
All RPC requests are logged with structured fields:
- `corr_id` - Correlation ID (UUID, tracks request lifecycle)
- `uid` / `pid` - Peer credentials from `SO_PEERCRED`
- `method` - RPC method called (`get_temp`, `register_nodes`, `ensure_cluster_keys`)
**Example log entries:**
```json
{"level":"info","corr_id":"a7f3d..","uid":1001,"pid":12345,"method":"get_temp","node":"pve1","msg":"RPC request"}
{"level":"info","corr_id":"a7f3d..","node":"pve1","latency_ms":245,"msg":"Temperature fetch successful"}
```
**Query logs:**
```bash
# All RPC requests in last hour
journalctl -u pulse-sensor-proxy --since "1 hour ago" -o json | \
jq -r 'select(.corr_id != null) | [.corr_id, .uid, .method, .node] | @tsv'
# Failed SSH requests
journalctl -u pulse-sensor-proxy --since today | grep -E '(SSH.*failed|error)'
# Rate limit hits
journalctl -u pulse-sensor-proxy --since today | grep "rate limit"
# Specific correlation ID
journalctl -u pulse-sensor-proxy | grep "corr_id=a7f3d"
```
### Rate Limiting
**Current limits (per peer UID+PID):**
- **Rate**: 20 requests/minute (token bucket with burst)
- **Burst**: 10 requests
- **Concurrency**: 10 simultaneous requests
**Behavior on limit exceeded:**
- Request rejected immediately (no queuing)
- `pulse_proxy_rate_limit_hits_total` metric incremented
- Log entry: `"Rate limit exceeded"`
- HTTP-like semantics: Similar to 429 Too Many Requests
**Adjust limits:**
Limits are hardcoded in `throttle.go`. To adjust, modify and rebuild:
```go
// cmd/pulse-sensor-proxy/throttle.go
const (
requestsPerMin = 20 // Change this
requestBurst = 10 // Change this
maxConcurrent = 10 // Change this
)
```
Then rebuild and restart:
```bash
go build -v ./cmd/pulse-sensor-proxy
systemctl restart pulse-sensor-proxy
```
## Incident Response
### Suspected Compromise Checklist
**If the proxy or host is suspected compromised:**
1. **Isolate immediately:**
```bash
# Stop proxy service
systemctl stop pulse-sensor-proxy
# Block outbound SSH from host (if applicable)
iptables -A OUTPUT -p tcp --dport 22 -j REJECT
```
2. **Rotate all keys:**
```bash
# Remove compromised keys from all nodes
for node in pve1 pve2 pve3; do
ssh root@$node "sed -i '/pulse-sensor-proxy/d' /root/.ssh/authorized_keys"
done
# Generate new keys (don't use rotation script - may be compromised)
rm -rf /var/lib/pulse-sensor-proxy/ssh*
mkdir -p /var/lib/pulse-sensor-proxy/ssh
ssh-keygen -t ed25519 -N '' -C "pulse-sensor-proxy emergency $(date -u +%Y%m%dT%H%M%SZ)" \
-f /var/lib/pulse-sensor-proxy/ssh/id_ed25519
chown -R pulse-sensor-proxy:pulse-sensor-proxy /var/lib/pulse-sensor-proxy/ssh
chmod 0700 /var/lib/pulse-sensor-proxy/ssh
chmod 0600 /var/lib/pulse-sensor-proxy/ssh/id_ed25519
chmod 0640 /var/lib/pulse-sensor-proxy/ssh/id_ed25519.pub
```
3. **Audit logs:**
```bash
# Export all proxy logs
journalctl -u pulse-sensor-proxy --since "7 days ago" > /tmp/proxy-audit-$(date +%s).log
# Look for anomalies:
# - Unusual correlation IDs
# - High rate limit hits
# - Unexpected UIDs/PIDs
# - SSH errors to unexpected nodes
```
4. **Reinstall proxy:**
```bash
# Re-run installation script
/opt/pulse/scripts/install-temp-proxy.sh
# Verify service status
systemctl status pulse-sensor-proxy
```
5. **Re-push keys:**
```bash
# Use proxy RPC to push new keys
/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh
```
6. **Verify no persistence mechanisms:**
```bash
# Check for unexpected systemd units
systemctl list-units --all | grep -i proxy
# Check for unexpected cron jobs
crontab -l -u pulse-sensor-proxy
# Check for unauthorized files in /var/lib/pulse-sensor-proxy
find /var/lib/pulse-sensor-proxy -type f ! -path '*/ssh/*' ! -path '*/ssh.d/*'
```
### Post-Incident Hardening
After an incident, consider:
- **Audit all LXC containers** for unexpected privilege escalation
- **Review bind mounts** on all containers (check for unauthorized mounts)
- **Enable full syscall auditing** (`auditd`) on host
- **Restrict network access** to proxy metrics endpoint (firewall `127.0.0.1:9127`)
- **Implement log aggregation** (forward `journald` to central SIEM)
## Testing & Rollout
### Development Testing
Before deploying to production, verify the implementation with these safe tests:
**1. Build Verification:**
```bash
# Compile proxy
cd /opt/pulse
go build -v ./cmd/pulse-sensor-proxy
# Verify binary
./pulse-sensor-proxy version
# Expected: pulse-sensor-proxy dev (or version number)
# Check help output
./pulse-sensor-proxy --help
```
**2. Rotation Script Syntax:**
```bash
# Syntax check
bash -n /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh
# Help output
/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --help
# Dry-run (requires root and socket)
sudo /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --dry-run
```
**3. Configuration Validation:**
```bash
# Test config file parsing
cat > /tmp/test-config.yaml <<EOF
allowed_source_subnets:
- "192.168.0.0/24"
- "10.0.0.0/8"
metrics_address: "127.0.0.1:9127"
EOF
# Validate YAML syntax
python3 -c "import yaml; yaml.safe_load(open('/tmp/test-config.yaml'))"
```
### Production Rollout Checklist
**Phase 1: Pre-Deployment (on host)**
1. **Backup current state:**
```bash
# Backup systemd unit
cp /etc/systemd/system/pulse-sensor-proxy.service \
/etc/systemd/system/pulse-sensor-proxy.service.backup
# Backup SSH keys
tar -czf /tmp/pulse-sensor-proxy-keys-backup-$(date +%s).tar.gz \
/var/lib/pulse-sensor-proxy/ssh/
# Note current service status
systemctl status pulse-sensor-proxy > /tmp/pulse-sensor-proxy-status-before.txt
```
2. **Create service account:**
```bash
# Run install script or manually create
if ! id -u pulse-sensor-proxy >/dev/null 2>&1; then
useradd --system --user-group --no-create-home --shell /usr/sbin/nologin pulse-sensor-proxy
fi
```
3. **Update file ownership:**
```bash
chown -R pulse-sensor-proxy:pulse-sensor-proxy /var/lib/pulse-sensor-proxy/
chmod 0750 /var/lib/pulse-sensor-proxy/
chmod 0700 /var/lib/pulse-sensor-proxy/ssh/
chmod 0600 /var/lib/pulse-sensor-proxy/ssh/id_ed25519
chmod 0640 /var/lib/pulse-sensor-proxy/ssh/id_ed25519.pub
```
**Phase 2: Deploy Hardened Version**
1. **Build and install binary:**
```bash
cd /opt/pulse
go build -v -o /tmp/pulse-sensor-proxy ./cmd/pulse-sensor-proxy
# Verify build
/tmp/pulse-sensor-proxy version
# Install
sudo install -m 0755 -o root -g root /tmp/pulse-sensor-proxy /usr/local/bin/pulse-sensor-proxy
```
2. **Install hardened systemd unit:**
```bash
# Copy hardened unit
sudo cp /opt/pulse/scripts/pulse-sensor-proxy.service /etc/systemd/system/
# Verify syntax
systemd-analyze verify /etc/systemd/system/pulse-sensor-proxy.service
# Reload systemd
sudo systemctl daemon-reload
```
3. **Update RuntimeDirectoryMode for LXC access:**
```bash
# Ensure socket directory is accessible from container
sudo mkdir -p /etc/systemd/system/pulse-sensor-proxy.service.d/
cat | sudo tee /etc/systemd/system/pulse-sensor-proxy.service.d/lxc-access.conf <<'EOF'
[Service]
RuntimeDirectoryMode=0775
EOF
sudo systemctl daemon-reload
```
**Phase 3: Restart and Verify**
1. **Restart service:**
```bash
sudo systemctl restart pulse-sensor-proxy
# Check status
sudo systemctl status pulse-sensor-proxy
```
2. **Verify service user:**
```bash
ps aux | grep pulse-sensor-proxy | grep -v grep
# Expected: pulse-sensor-proxy user, not root
```
3. **Check socket permissions:**
```bash
ls -ld /run/pulse-sensor-proxy/
# Expected: drwxrwxr-x pulse-sensor-proxy pulse-sensor-proxy
ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock
# Expected: srwxrwxrwx pulse-sensor-proxy pulse-sensor-proxy
```
4. **Test from container:**
```bash
# Inside LXC container running Pulse
ls -la /run/pulse-sensor-proxy/
# Should show socket
# Check Pulse logs for connection success
journalctl -u pulse-backend -n 50 | grep -i temperature
```
**Phase 4: End-to-End Validation**
1. **Test RPC methods:**
```bash
# On host, test socket connectivity
echo '{"correlation_id":"test-001","method":"register_nodes","params":{}}' | \
sudo socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock | jq .
# Should return cluster nodes list
```
2. **Test temperature fetch:**
```bash
# From container or via socket
echo '{"correlation_id":"test-002","method":"get_temp","params":{"node":"pve1"}}' | \
socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock | jq .
# Should return sensors JSON data
```
3. **Verify metrics endpoint:**
```bash
curl -s http://127.0.0.1:9127/metrics | grep pulse_proxy
# Should show metrics like:
# pulse_proxy_rpc_requests_total{method="get_temp",result="success"} N
# pulse_proxy_queue_depth 0
```
4. **Test SSH key rotation:**
```bash
# Dry-run first
sudo /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --dry-run
# Full rotation (if confident)
sudo /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh
# Verify all nodes updated
for node in pve1 pve2 pve3; do
ssh root@$node "tail -1 /root/.ssh/authorized_keys"
done
```
5. **Audit logging verification:**
```bash
# Check logs include correlation IDs and peer credentials
sudo journalctl -u pulse-sensor-proxy --since "5 minutes ago" -o json | \
jq -r 'select(.corr_id != null) | [.corr_id, .uid, .method] | @tsv'
# Should show structured logging with UIDs
```
**Phase 5: Monitoring Setup**
1. **Configure Prometheus scraping:**
```yaml
# Add to prometheus.yml
scrape_configs:
- job_name: 'pulse-sensor-proxy'
static_configs:
- targets: ['localhost:9127']
```
2. **Import alert rules:**
```bash
# Copy alert rules from docs to Prometheus alerts directory
# Reload Prometheus configuration
```
3. **Verify alerts fire (optional stress test):**
```bash
# Generate rate limit hits (test alert)
for i in {1..50}; do
echo '{"correlation_id":"stress-'$i'","method":"register_nodes","params":{}}' | \
socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock &
done
wait
# Check rate limit metric increased
curl -s http://127.0.0.1:9127/metrics | grep rate_limit_hits
```
### Rollback Procedure
If issues occur during rollout:
1. **Stop new service:**
```bash
sudo systemctl stop pulse-sensor-proxy
```
2. **Restore backup:**
```bash
sudo cp /etc/systemd/system/pulse-sensor-proxy.service.backup \
/etc/systemd/system/pulse-sensor-proxy.service
sudo systemctl daemon-reload
```
3. **Restore SSH keys (if rotated):**
```bash
# If rotation was performed and failed
sudo /opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --rollback
```
4. **Restart with old configuration:**
```bash
sudo systemctl restart pulse-sensor-proxy
sudo systemctl status pulse-sensor-proxy
```
5. **Verify Pulse connectivity:**
```bash
# Check Pulse can still fetch temperatures
# Monitor Pulse logs
```
### Known Limitations
- **No automated unit tests**: Code verification relies on build success and manual testing
- **Key rotation requires manual trigger**: Automated timer setup is optional
- **Metrics require Prometheus**: No built-in alerting without external monitoring
- **LXC bind mount required**: Container must have directory-level bind mount configured
- **Root required for rotation script**: Script needs root to run `ensure_cluster_keys` RPC
### Future Improvements
- Add Go unit tests for validation, throttling, and metrics logic
- Implement health check endpoint (e.g., `/health`) separate from metrics
- Add support for TLS on metrics endpoint
- Create automated integration test suite
- Add `--check` flag to rotation script for pre-flight validation
- Support for multiple LXC containers accessing same proxy instance
## Appendix
### Quick Verification Checklist
**Host:**
- [ ] Service running as `pulse-sensor-proxy` user (not root)
- [ ] Keys in `/var/lib/pulse-sensor-proxy/ssh/` owned by `pulse-sensor-proxy:pulse-sensor-proxy`
- [ ] Private key permissions: `0600`
- [ ] Socket directory permissions: `0775` (not `0770`)
- [ ] Metrics endpoint accessible: `curl http://127.0.0.1:9127/metrics`
**Container:**
- [ ] Container is unprivileged (`unprivileged: 1` in config)
- [ ] Bind mount exists: `ls /run/pulse-sensor-proxy/pulse-sensor-proxy.sock`
- [ ] AppArmor enforced: `cat /proc/self/attr/current` shows confinement
- [ ] Pulse can connect to socket (check Pulse logs)
**SSH Keys:**
- [ ] All nodes have `pulse-sensor-proxy` key in `/root/.ssh/authorized_keys`
- [ ] Keys include `from="..."` restrictions
- [ ] Keys include `command="sensors -j"` forced command
- [ ] Keys include `no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty`
**Monitoring:**
- [ ] Prometheus scraping metrics successfully
- [ ] Alerts configured for SSH failures, rate limiting, queue depth
- [ ] Logs forwarded to central logging (optional but recommended)
### Reference Commands
**Service Management:**
```bash
systemctl status pulse-sensor-proxy # Check service status
systemctl restart pulse-sensor-proxy # Restart service
journalctl -u pulse-sensor-proxy -f # Tail logs
```
**Key Management:**
```bash
/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --dry-run # Dry-run rotation
/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh # Perform rotation
/opt/pulse/scripts/pulse-sensor-proxy-rotate-keys.sh --rollback # Rollback
```
**Metrics:**
```bash
curl http://127.0.0.1:9127/metrics # Fetch all metrics
curl -s http://127.0.0.1:9127/metrics | grep pulse_proxy # Filter proxy metrics
```
**Manual RPC (Testing):**
```bash
# Using socat (inline JSON)
echo '{"correlation_id":"test","method":"get_temp","params":{"node":"pve1"}}' | \
socat - UNIX-CONNECT:/run/pulse-sensor-proxy/pulse-sensor-proxy.sock
# Using Python (proper JSON-RPC client)
python3 <<'PY'
import json, socket, uuid
payload = {
"correlation_id": str(uuid.uuid4()),
"method": "get_temp",
"params": {"node": "pve1"}
}
with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s:
s.connect("/run/pulse-sensor-proxy/pulse-sensor-proxy.sock")
s.sendall((json.dumps(payload) + "\n").encode())
s.shutdown(socket.SHUT_WR)
print(s.recv(65536).decode())
PY
```
**Verification:**
```bash
# Check service user
ps aux | grep pulse-sensor-proxy | grep -v grep
# Check file ownership
ls -lR /var/lib/pulse-sensor-proxy/
# Check bind mount in container
pct enter <VMID>
ls -la /run/pulse-sensor-proxy/
# Check SSH keys on nodes
for node in pve1 pve2 pve3; do
echo "=== $node ==="
ssh root@$node "grep pulse-sensor-proxy /root/.ssh/authorized_keys"
done
```
---
**Document Version:** 1.0
**Last Updated:** 2025-10-13
**Applies To:** pulse-sensor-proxy v1.0+

View file

@ -18,8 +18,8 @@ Pulse can display real-time CPU and NVMe temperatures directly in your dashboard
For **containerized deployments** (LXC/Docker), Pulse uses a secure proxy architecture:
1. **pulse-temp-proxy** runs on the Proxmox host (outside the container)
2. SSH keys are stored on the host filesystem (`/var/lib/pulse-temp-proxy/ssh/`)
1. **pulse-sensor-proxy** runs on the Proxmox host (outside the container)
2. SSH keys are stored on the host filesystem (`/var/lib/pulse-sensor-proxy/ssh/`)
3. Pulse communicates with the proxy via unix socket
4. The proxy handles all SSH connections to cluster nodes
@ -202,7 +202,7 @@ You can still manage the entry manually if you prefer, but no extra steps are re
### Secure Proxy Architecture (Current)
As of v4.24.0, containerized deployments use **pulse-temp-proxy** which eliminates the security concerns:
As of v4.24.0, containerized deployments use **pulse-sensor-proxy** which eliminates the security concerns:
- **SSH keys stored on host** - Not accessible from container
- **Unix socket communication** - Pulse never touches SSH keys
@ -294,13 +294,13 @@ To check if your deployment is using the secure proxy:
```bash
# On Proxmox host - check proxy service
systemctl status pulse-temp-proxy
systemctl status pulse-sensor-proxy
# Check if socket exists
ls -l /run/pulse-temp-proxy/pulse-temp-proxy.sock
ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock
# View proxy logs
journalctl -u pulse-temp-proxy -f
journalctl -u pulse-sensor-proxy -f
```
In the Pulse container, check the logs at startup:
@ -327,27 +327,27 @@ Temperature data will stop appearing in the dashboard after the next polling cyc
### Managing the Proxy Service
The pulse-temp-proxy service runs on the Proxmox host (outside the container).
The pulse-sensor-proxy service runs on the Proxmox host (outside the container).
**Service Management:**
```bash
# Check service status
systemctl status pulse-temp-proxy
systemctl status pulse-sensor-proxy
# Restart the proxy
systemctl restart pulse-temp-proxy
systemctl restart pulse-sensor-proxy
# Stop the proxy (disables temperature monitoring)
systemctl stop pulse-temp-proxy
systemctl stop pulse-sensor-proxy
# Start the proxy
systemctl start pulse-temp-proxy
systemctl start pulse-sensor-proxy
# Enable proxy to start on boot
systemctl enable pulse-temp-proxy
systemctl enable pulse-sensor-proxy
# Disable proxy autostart
systemctl disable pulse-temp-proxy
systemctl disable pulse-sensor-proxy
```
### Log Locations
@ -355,16 +355,16 @@ systemctl disable pulse-temp-proxy
**Proxy Logs (on Proxmox host):**
```bash
# Follow proxy logs in real-time
journalctl -u pulse-temp-proxy -f
journalctl -u pulse-sensor-proxy -f
# View last 50 lines
journalctl -u pulse-temp-proxy -n 50
journalctl -u pulse-sensor-proxy -n 50
# View logs since last boot
journalctl -u pulse-temp-proxy -b
journalctl -u pulse-sensor-proxy -b
# View logs with timestamps
journalctl -u pulse-temp-proxy --since "1 hour ago"
journalctl -u pulse-sensor-proxy --since "1 hour ago"
```
**Pulse Logs (in container):**
@ -381,12 +381,12 @@ Rotate SSH keys periodically for security (recommended every 90 days):
```bash
# 1. On Proxmox host, backup old keys
cd /var/lib/pulse-temp-proxy/ssh/
cd /var/lib/pulse-sensor-proxy/ssh/
cp id_ed25519 id_ed25519.backup
cp id_ed25519.pub id_ed25519.pub.backup
# 2. Generate new keypair
ssh-keygen -t ed25519 -f id_ed25519 -N "" -C "pulse-temp-proxy-rotated"
ssh-keygen -t ed25519 -f id_ed25519 -N "" -C "pulse-sensor-proxy-rotated"
# 3. Get the new public key
cat id_ed25519.pub
@ -398,12 +398,12 @@ ssh root@node2 "echo 'NEW_PUBLIC_KEY_HERE' >> /root/.ssh/authorized_keys"
# ... repeat for all nodes
# 5. Restart proxy to use new keys
systemctl restart pulse-temp-proxy
systemctl restart pulse-sensor-proxy
# 6. Verify temperature data still works in Pulse UI
# 7. Remove old keys from nodes (after confirming new keys work)
ssh root@node1 "sed -i '/pulse-temp-proxy-old/d' /root/.ssh/authorized_keys"
ssh root@node1 "sed -i '/pulse-sensor-proxy-old/d' /root/.ssh/authorized_keys"
```
### Revoking Access When Nodes Leave
@ -412,7 +412,7 @@ When removing a node from your cluster:
```bash
# On the node being removed, remove the proxy's public key
ssh root@old-node "sed -i '/pulse-temp-proxy/d' /root/.ssh/authorized_keys"
ssh root@old-node "sed -i '/pulse-sensor-proxy/d' /root/.ssh/authorized_keys"
# No restart needed - proxy will fail gracefully for that node
# Temperature monitoring will continue for remaining nodes
@ -422,14 +422,14 @@ ssh root@old-node "sed -i '/pulse-temp-proxy/d' /root/.ssh/authorized_keys"
**Proxy Not Running:**
- Symptom: No temperature data in Pulse UI
- Check: `systemctl status pulse-temp-proxy` on Proxmox host
- Fix: `systemctl start pulse-temp-proxy`
- Check: `systemctl status pulse-sensor-proxy` on Proxmox host
- Fix: `systemctl start pulse-sensor-proxy`
**Socket Not Accessible in Container:**
- Symptom: Pulse logs show "Temperature proxy not available - using direct SSH"
- Check: `ls -l /run/pulse-temp-proxy/pulse-temp-proxy.sock` in container
- Check: `ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock` in container
- Fix: Verify bind mount in LXC config (`/etc/pve/lxc/<CTID>.conf`)
- Should have: `lxc.mount.entry: /run/pulse-temp-proxy run/pulse-temp-proxy none bind,create=dir 0 0`
- Should have: `lxc.mount.entry: /run/pulse-sensor-proxy run/pulse-sensor-proxy none bind,create=dir 0 0`
**pvecm Not Available:**
- Symptom: Proxy fails to discover cluster nodes
@ -455,13 +455,13 @@ The proxy service includes systemd restart-on-failure, which handles most issues
```bash
# Check proxy health
systemctl is-active pulse-temp-proxy && echo "Proxy is running" || echo "Proxy is down"
systemctl is-active pulse-sensor-proxy && echo "Proxy is running" || echo "Proxy is down"
# Monitor logs for errors
journalctl -u pulse-temp-proxy --since "1 hour ago" | grep -i error
journalctl -u pulse-sensor-proxy --since "1 hour ago" | grep -i error
# Verify socket exists and is accessible
test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock && echo "Socket OK" || echo "Socket missing"
test -S /run/pulse-sensor-proxy/pulse-sensor-proxy.sock && echo "Socket OK" || echo "Socket missing"
```
**Alerting:**
@ -474,7 +474,7 @@ test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock && echo "Socket OK" || echo
### Known Limitations
**One Proxy Per Host:**
- Each Proxmox host runs one pulse-temp-proxy instance
- Each Proxmox host runs one pulse-sensor-proxy instance
- If multiple Pulse containers run on same host, they share the same proxy
- All containers see the same temperature data from the same cluster
@ -496,10 +496,10 @@ test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock && echo "Socket OK" || echo
### Common Issues
**Temperature Data Stops Appearing:**
1. Check proxy service: `systemctl status pulse-temp-proxy`
2. Check proxy logs: `journalctl -u pulse-temp-proxy -n 50`
1. Check proxy service: `systemctl status pulse-sensor-proxy`
2. Check proxy logs: `journalctl -u pulse-sensor-proxy -n 50`
3. Test SSH manually: `ssh root@node "sensors -j"`
4. Verify socket exists: `ls -l /run/pulse-temp-proxy/pulse-temp-proxy.sock`
4. Verify socket exists: `ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock`
**New Cluster Node Not Showing Temperatures:**
1. Ensure lm-sensors installed: `ssh root@new-node "sensors -j"`
@ -507,14 +507,14 @@ test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock && echo "Socket OK" || echo
3. Force refresh by restarting Pulse: `pct restart <CTID>`
**Permission Denied Errors:**
1. Verify socket permissions: `ls -l /run/pulse-temp-proxy/pulse-temp-proxy.sock`
1. Verify socket permissions: `ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock`
2. Should be: `srw-rw---- 1 root root`
3. Check Pulse runs as root in container: `pct exec <CTID> -- whoami`
**Proxy Service Won't Start:**
1. Check logs: `journalctl -u pulse-temp-proxy -n 50`
2. Verify binary exists: `ls -l /usr/local/bin/pulse-temp-proxy`
3. Test manually: `/usr/local/bin/pulse-temp-proxy --version`
1. Check logs: `journalctl -u pulse-sensor-proxy -n 50`
2. Verify binary exists: `ls -l /usr/local/bin/pulse-sensor-proxy`
3. Test manually: `/usr/local/bin/pulse-sensor-proxy --version`
4. Check socket directory: `ls -ld /var/run`
### Getting Help
@ -524,9 +524,9 @@ If temperature monitoring isn't working:
1. **Collect diagnostic info:**
```bash
# On Proxmox host
systemctl status pulse-temp-proxy
journalctl -u pulse-temp-proxy -n 100 > /tmp/proxy-logs.txt
ls -la /run/pulse-temp-proxy/pulse-temp-proxy.sock
systemctl status pulse-sensor-proxy
journalctl -u pulse-sensor-proxy -n 100 > /tmp/proxy-logs.txt
ls -la /run/pulse-sensor-proxy/pulse-sensor-proxy.sock
# In Pulse container
journalctl -u pulse -n 100 | grep -i temp > /tmp/pulse-temp-logs.txt

14
go.mod
View file

@ -8,18 +8,24 @@ require (
github.com/coreos/go-oidc/v3 v3.15.0
github.com/docker/docker v28.5.1+incompatible
github.com/fsnotify/fsnotify v1.9.0
github.com/google/uuid v1.6.0
github.com/gorilla/websocket v1.5.3
github.com/joho/godotenv v1.5.1
github.com/oklog/ulid/v2 v2.1.1
github.com/prometheus/client_golang v1.23.2
github.com/rs/zerolog v1.34.0
github.com/spf13/cobra v1.9.1
golang.org/x/crypto v0.42.0
golang.org/x/oauth2 v0.31.0
golang.org/x/term v0.35.0
golang.org/x/time v0.13.0
gopkg.in/yaml.v3 v3.0.1
)
require (
github.com/Microsoft/go-winio v0.4.21 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/containerd/errdefs v1.0.0 // indirect
github.com/containerd/errdefs/pkg v0.3.0 // indirect
github.com/containerd/log v0.1.0 // indirect
@ -37,10 +43,13 @@ require (
github.com/moby/sys/atomicwriter v0.1.0 // indirect
github.com/moby/term v0.5.2 // indirect
github.com/morikuni/aec v1.0.0 // indirect
github.com/oklog/ulid/v2 v2.1.1 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.66.1 // indirect
github.com/prometheus/procfs v0.16.1 // indirect
github.com/spf13/pflag v1.0.7 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
@ -48,7 +57,8 @@ require (
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 // indirect
go.opentelemetry.io/otel/metric v1.38.0 // indirect
go.opentelemetry.io/otel/trace v1.38.0 // indirect
go.yaml.in/yaml/v2 v2.4.2 // indirect
golang.org/x/sys v0.36.0 // indirect
golang.org/x/time v0.13.0 // indirect
google.golang.org/protobuf v1.36.8 // indirect
gotest.tools/v3 v3.5.2 // indirect
)

22
go.sum
View file

@ -2,8 +2,12 @@ github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEK
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/Microsoft/go-winio v0.4.21 h1:+6mVbXh4wPzUrl1COX9A+ZCvEpYsOBZ6/+kwDnvLyro=
github.com/Microsoft/go-winio v0.4.21/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
@ -48,10 +52,14 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
@ -69,6 +77,8 @@ github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/oklog/ulid/v2 v2.1.1 h1:suPZ4ARWLOJLegGFiZZ1dFAkqzhMjL3J1TzI+5wHz8s=
github.com/oklog/ulid/v2 v2.1.1/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
@ -80,6 +90,14 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
@ -117,6 +135,10 @@ go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJr
go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4=
go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI=
golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8=
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=

View file

@ -3230,7 +3230,7 @@ echo " • No port forwarding, X11, PTY, or agent forwarding allowed"
echo ""
echo "For containerized Pulse (LXC/Docker):"
echo " • SSH keys stored on Proxmox host (not inside container)"
echo " • pulse-temp-proxy service manages connections securely"
echo " • pulse-sensor-proxy service manages connections securely"
echo " • Container compromise does not expose SSH credentials"
echo ""
echo "For native Pulse installations:"

View file

@ -11,11 +11,11 @@ import (
)
const (
defaultSocketPath = "/run/pulse-temp-proxy/pulse-temp-proxy.sock"
defaultSocketPath = "/run/pulse-sensor-proxy/pulse-sensor-proxy.sock"
defaultTimeout = 10 * time.Second
)
// Client communicates with pulse-temp-proxy via unix socket
// Client communicates with pulse-sensor-proxy via unix socket
type Client struct {
socketPath string
timeout time.Duration
@ -23,7 +23,7 @@ type Client struct {
// NewClient creates a new proxy client
func NewClient() *Client {
socketPath := os.Getenv("PULSE_TEMP_PROXY_SOCKET")
socketPath := os.Getenv("PULSE_SENSOR_PROXY_SOCKET")
if socketPath == "" {
socketPath = defaultSocketPath
}

View file

@ -67,8 +67,8 @@ for build_name in "${!builds[@]}"; do
env $build_env go build \
-ldflags="-s -w -X main.Version=v${VERSION} -X main.BuildTime=${build_time} -X main.GitCommit=${git_commit}" \
-trimpath \
-o "$BUILD_DIR/pulse-temp-proxy-$build_name" \
./cmd/pulse-temp-proxy
-o "$BUILD_DIR/pulse-sensor-proxy-$build_name" \
./cmd/pulse-sensor-proxy
# Create release archive with proper structure
tar_name="pulse-v${VERSION}-${build_name}.tar.gz"
@ -82,7 +82,7 @@ for build_name in "${!builds[@]}"; do
# Copy binaries and VERSION file
cp "$BUILD_DIR/pulse-$build_name" "$staging_dir/bin/pulse"
cp "$BUILD_DIR/pulse-docker-agent-$build_name" "$staging_dir/bin/pulse-docker-agent"
cp "$BUILD_DIR/pulse-temp-proxy-$build_name" "$staging_dir/bin/pulse-temp-proxy"
cp "$BUILD_DIR/pulse-sensor-proxy-$build_name" "$staging_dir/bin/pulse-sensor-proxy"
cp "scripts/install-docker-agent.sh" "$staging_dir/scripts/install-docker-agent.sh"
chmod 755 "$staging_dir/scripts/install-docker-agent.sh"
echo "$VERSION" > "$staging_dir/VERSION"
@ -109,7 +109,7 @@ mkdir -p "$universal_dir/scripts"
for build_name in "${!builds[@]}"; do
cp "$BUILD_DIR/pulse-$build_name" "$universal_dir/bin/pulse-${build_name}"
cp "$BUILD_DIR/pulse-docker-agent-$build_name" "$universal_dir/bin/pulse-docker-agent-${build_name}"
cp "$BUILD_DIR/pulse-temp-proxy-$build_name" "$universal_dir/bin/pulse-temp-proxy-${build_name}"
cp "$BUILD_DIR/pulse-sensor-proxy-$build_name" "$universal_dir/bin/pulse-sensor-proxy-${build_name}"
done
cp "scripts/install-docker-agent.sh" "$universal_dir/scripts/install-docker-agent.sh"
@ -162,20 +162,20 @@ esac
EOF
chmod +x "$universal_dir/bin/pulse-docker-agent"
cat > "$universal_dir/bin/pulse-temp-proxy" << 'EOF'
cat > "$universal_dir/bin/pulse-sensor-proxy" << 'EOF'
#!/bin/sh
# Auto-detect architecture and run appropriate pulse-temp-proxy binary
# Auto-detect architecture and run appropriate pulse-sensor-proxy binary
ARCH=$(uname -m)
case "$ARCH" in
x86_64|amd64)
exec "$(dirname "$0")/pulse-temp-proxy-linux-amd64" "$@"
exec "$(dirname "$0")/pulse-sensor-proxy-linux-amd64" "$@"
;;
aarch64|arm64)
exec "$(dirname "$0")/pulse-temp-proxy-linux-arm64" "$@"
exec "$(dirname "$0")/pulse-sensor-proxy-linux-arm64" "$@"
;;
armv7l|armhf)
exec "$(dirname "$0")/pulse-temp-proxy-linux-armv7" "$@"
exec "$(dirname "$0")/pulse-sensor-proxy-linux-armv7" "$@"
;;
*)
echo "Unsupported architecture: $ARCH" >&2
@ -183,7 +183,7 @@ case "$ARCH" in
;;
esac
EOF
chmod +x "$universal_dir/bin/pulse-temp-proxy"
chmod +x "$universal_dir/bin/pulse-sensor-proxy"
# Add VERSION file
echo "$VERSION" > "$universal_dir/VERSION"
@ -196,16 +196,16 @@ cd ../..
# Cleanup
rm -rf "$universal_dir"
# Copy standalone pulse-temp-proxy binaries to release directory
# Copy standalone pulse-sensor-proxy binaries to release directory
# These are needed by install-temp-proxy.sh installer script
echo "Copying standalone pulse-temp-proxy binaries..."
echo "Copying standalone pulse-sensor-proxy binaries..."
for build_name in "${!builds[@]}"; do
cp "$BUILD_DIR/pulse-temp-proxy-$build_name" "$RELEASE_DIR/"
cp "$BUILD_DIR/pulse-sensor-proxy-$build_name" "$RELEASE_DIR/"
done
# Generate checksums (include tarballs and standalone binaries)
cd $RELEASE_DIR
sha256sum *.tar.gz pulse-temp-proxy-* > checksums.txt
sha256sum *.tar.gz pulse-sensor-proxy-* > checksums.txt
cd ..
echo

View file

@ -1,6 +1,6 @@
#!/bin/bash
# install-temp-proxy.sh - Installs pulse-temp-proxy on Proxmox host for secure temperature monitoring
# install-temp-proxy.sh - Installs pulse-sensor-proxy on Proxmox host for secure temperature monitoring
# This script is idempotent and can be safely re-run
set -euo pipefail
@ -67,13 +67,22 @@ if ! pct status "$CTID" >/dev/null 2>&1; then
exit 1
fi
print_info "Installing pulse-temp-proxy for container $CTID"
print_info "Installing pulse-sensor-proxy for container $CTID"
BINARY_PATH="/usr/local/bin/pulse-temp-proxy"
SERVICE_PATH="/etc/systemd/system/pulse-temp-proxy.service"
RUNTIME_DIR="/run/pulse-temp-proxy"
SOCKET_PATH="/run/pulse-temp-proxy/pulse-temp-proxy.sock"
SSH_DIR="/var/lib/pulse-temp-proxy/ssh"
BINARY_PATH="/usr/local/bin/pulse-sensor-proxy"
SERVICE_PATH="/etc/systemd/system/pulse-sensor-proxy.service"
RUNTIME_DIR="/run/pulse-sensor-proxy"
SOCKET_PATH="/run/pulse-sensor-proxy/pulse-sensor-proxy.sock"
SSH_DIR="/var/lib/pulse-sensor-proxy/ssh"
# Create dedicated service account if it doesn't exist
if ! id -u pulse-sensor-proxy >/dev/null 2>&1; then
print_info "Creating pulse-sensor-proxy service account..."
useradd --system --user-group --no-create-home --shell /usr/sbin/nologin pulse-sensor-proxy
print_info "Service account created"
else
print_info "Service account pulse-sensor-proxy already exists"
fi
# Install binary - either from local file or download from GitHub
if [[ -n "$LOCAL_BINARY" ]]; then
@ -105,13 +114,13 @@ else
ARCH=$(uname -m)
case $ARCH in
x86_64)
BINARY_NAME="pulse-temp-proxy-linux-amd64"
BINARY_NAME="pulse-sensor-proxy-linux-amd64"
;;
aarch64|arm64)
BINARY_NAME="pulse-temp-proxy-linux-arm64"
BINARY_NAME="pulse-sensor-proxy-linux-arm64"
;;
armv7l|armhf)
BINARY_NAME="pulse-temp-proxy-linux-armv7"
BINARY_NAME="pulse-sensor-proxy-linux-armv7"
;;
*)
print_error "Unsupported architecture: $ARCH"
@ -134,12 +143,19 @@ else
print_info "Binary installed to $BINARY_PATH"
fi
# Create SSH key directory
mkdir -p "$SSH_DIR"
chmod 700 "$SSH_DIR"
# Create directories with proper ownership (handles fresh installs and upgrades)
print_info "Setting up directories with proper ownership..."
install -d -o pulse-sensor-proxy -g pulse-sensor-proxy -m 0750 /var/lib/pulse-sensor-proxy
install -d -o pulse-sensor-proxy -g pulse-sensor-proxy -m 0700 "$SSH_DIR"
# Install systemd service
print_info "Installing systemd service..."
# Stop existing service if running (for upgrades)
if systemctl is-active --quiet pulse-sensor-proxy 2>/dev/null; then
print_info "Stopping existing service for upgrade..."
systemctl stop pulse-sensor-proxy
fi
# Install hardened systemd service
print_info "Installing hardened systemd service..."
cat > "$SERVICE_PATH" << 'EOF'
[Unit]
Description=Pulse Temperature Proxy
@ -148,26 +164,47 @@ After=network.target
[Service]
Type=simple
User=root
ExecStart=/usr/local/bin/pulse-temp-proxy
User=pulse-sensor-proxy
Group=pulse-sensor-proxy
WorkingDirectory=/var/lib/pulse-sensor-proxy
ExecStart=/usr/local/bin/pulse-sensor-proxy
Restart=on-failure
RestartSec=5s
# Runtime directory for socket
RuntimeDirectory=pulse-temp-proxy
# Runtime dirs/sockets
RuntimeDirectory=pulse-sensor-proxy
RuntimeDirectoryMode=0775
UMask=0007
# Security hardening
# Core hardening
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/var/lib/pulse-temp-proxy
ProtectHome=read-only
ReadWritePaths=/var/lib/pulse-sensor-proxy
ProtectKernelTunables=true
ProtectKernelModules=true
ProtectControlGroups=true
ProtectClock=true
PrivateTmp=true
PrivateDevices=true
ProtectProc=invisible
ProcSubset=pid
LockPersonality=true
RemoveIPC=true
RestrictSUIDSGID=true
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
RestrictNamespaces=true
SystemCallFilter=@system-service
SystemCallErrorNumber=EPERM
CapabilityBoundingSet=
AmbientCapabilities=
KeyringMode=private
LimitNOFILE=1024
# Logging
StandardOutput=journal
StandardError=journal
SyslogIdentifier=pulse-temp-proxy
SyslogIdentifier=pulse-sensor-proxy
[Install]
WantedBy=multi-user.target
@ -176,8 +213,8 @@ EOF
# Reload systemd and start service
print_info "Enabling and starting service..."
systemctl daemon-reload
systemctl enable pulse-temp-proxy.service
systemctl restart pulse-temp-proxy.service
systemctl enable pulse-sensor-proxy.service
systemctl restart pulse-sensor-proxy.service
# Wait for socket to appear
print_info "Waiting for socket..."
@ -190,7 +227,7 @@ done
if [[ ! -S "$SOCKET_PATH" ]]; then
print_error "Socket did not appear after 10 seconds"
print_info "Check service status: systemctl status pulse-temp-proxy"
print_info "Check service status: systemctl status pulse-sensor-proxy"
exit 1
fi
@ -198,15 +235,15 @@ print_info "Socket ready at $SOCKET_PATH"
# Configure LXC bind mount - mount entire directory for socket stability
LXC_CONFIG="/etc/pve/lxc/${CTID}.conf"
BIND_ENTRY="lxc.mount.entry: /run/pulse-temp-proxy run/pulse-temp-proxy none bind,create=dir 0 0"
BIND_ENTRY="lxc.mount.entry: /run/pulse-sensor-proxy run/pulse-sensor-proxy none bind,create=dir 0 0"
# Check if bind mount already exists
if grep -q "pulse-temp-proxy" "$LXC_CONFIG"; then
if grep -q "pulse-sensor-proxy" "$LXC_CONFIG"; then
print_info "Bind mount already configured in LXC config"
# Remove old socket-level bind if it exists
if grep -q "pulse-temp-proxy.sock" "$LXC_CONFIG"; then
if grep -q "pulse-sensor-proxy.sock" "$LXC_CONFIG"; then
print_info "Upgrading from socket-level to directory-level bind mount..."
sed -i '/pulse-temp-proxy\.sock/d' "$LXC_CONFIG"
sed -i '/pulse-sensor-proxy\.sock/d' "$LXC_CONFIG"
echo "$BIND_ENTRY" >> "$LXC_CONFIG"
NEEDS_RESTART=true
fi
@ -227,7 +264,7 @@ fi
# Verify socket is accessible in container
print_info "Verifying socket accessibility..."
if pct exec "$CTID" -- test -S /run/pulse-temp-proxy/pulse-temp-proxy.sock; then
if pct exec "$CTID" -- test -S /run/pulse-sensor-proxy/pulse-sensor-proxy.sock; then
print_info "Socket is accessible in container"
else
print_warn "Socket is not yet accessible in container"
@ -236,11 +273,11 @@ fi
# Test proxy status
print_info "Testing proxy status..."
if systemctl is-active --quiet pulse-temp-proxy; then
print_info "${GREEN}${NC} pulse-temp-proxy is running"
if systemctl is-active --quiet pulse-sensor-proxy; then
print_info "${GREEN}${NC} pulse-sensor-proxy is running"
else
print_error "pulse-temp-proxy is not running"
print_info "Check logs: journalctl -u pulse-temp-proxy -n 50"
print_error "pulse-sensor-proxy is not running"
print_info "Check logs: journalctl -u pulse-sensor-proxy -n 50"
exit 1
fi
@ -255,7 +292,7 @@ print_info " 2. Go to Settings → Enable Temperature Monitoring"
print_info " 3. The proxy will automatically discover and configure cluster nodes"
print_info ""
print_info "To check proxy status:"
print_info " systemctl status pulse-temp-proxy"
print_info " journalctl -u pulse-temp-proxy -f"
print_info " systemctl status pulse-sensor-proxy"
print_info " journalctl -u pulse-sensor-proxy -f"
exit 0

View file

@ -0,0 +1,314 @@
#!/usr/bin/env bash
# pulse-proxy-rotate-keys.sh
# Rotate pulse-sensor-proxy SSH keys with staging, verification, and rollback support.
set -euo pipefail
BASE_DIR="/var/lib/pulse-sensor-proxy"
ACTIVE_DIR="${BASE_DIR}/ssh"
POOL_DIR="${BASE_DIR}/ssh.d"
STAGING_DIR="${POOL_DIR}/next"
BACKUP_DIR="${POOL_DIR}/prev"
SOCKET_PATH="/run/pulse-sensor-proxy/pulse-sensor-proxy.sock"
SCRIPT_TAG="pulse-proxy-rotate"
SSH_KEY_TYPE="ed25519"
SSH_KEY_COMMENT="pulse-sensor-proxy"
SSH_KEY_FILE="id_${SSH_KEY_TYPE}"
dry_run=false
do_rollback=false
usage() {
cat <<'EOF'
Usage: pulse-proxy-rotate-keys.sh [--dry-run] [--rollback]
Options:
--dry-run Walk through all steps without modifying state or contacting nodes.
--rollback Restore the previously active keypair (requires ssh.d/prev).
-h, --help Show this help.
Examples:
./pulse-proxy-rotate-keys.sh --dry-run
./pulse-proxy-rotate-keys.sh
./pulse-proxy-rotate-keys.sh --rollback
EOF
}
log_info() { logger -t "${SCRIPT_TAG}" "INFO: $*"; printf '[INFO] %s\n' "$*"; }
log_warn() { logger -t "${SCRIPT_TAG}" "WARN: $*"; printf '[WARN] %s\n' "$*"; }
log_error() { logger -t "${SCRIPT_TAG}" "ERROR: $*"; printf '[ERROR] %s\n' "$*" >&2; }
require_root() {
if (( EUID != 0 )); then
log_error "This script must be run as root."
exit 1
fi
}
require_cmds() {
local missing=()
for cmd in ssh-keygen ssh jq socat python3 stat mkdir; do
if ! command -v "$cmd" >/dev/null 2>&1; then
missing+=("$cmd")
fi
done
if ((${#missing[@]} > 0)); then
log_error "Missing required commands: ${missing[*]}"
exit 1
fi
}
parse_args() {
while (($#)); do
case "$1" in
--dry-run) dry_run=true ;;
--rollback) do_rollback=true ;;
-h|--help) usage; exit 0 ;;
*) log_error "Unknown option: $1"; usage; exit 1 ;;
esac
shift
done
if $dry_run && $do_rollback; then
log_error "Cannot combine --dry-run and --rollback."
exit 1
fi
}
ensure_socket() {
if [[ ! -S "$SOCKET_PATH" ]]; then
log_error "Proxy socket not found at $SOCKET_PATH. Is pulse-sensor-proxy running?"
exit 1
fi
}
run_cmd() {
if $dry_run; then
log_info "[dry-run] $*"
else
"$@"
fi
}
json_rpc() {
local method=$1
local params_json=${2:-"{}"}
local response
if $dry_run; then
log_info "[dry-run] would call RPC ${method} with params ${params_json}"
printf '{"success":true,"data":{}}'
return 0
fi
response=$(SOCKET="$SOCKET_PATH" METHOD="$method" PARAMS="$params_json" python3 - <<'PY'
import json
import os
import socket
import sys
import uuid
sock_path = os.environ["SOCKET"]
method = os.environ["METHOD"]
params = json.loads(os.environ["PARAMS"]) if os.environ["PARAMS"] else {}
payload = {
"correlation_id": str(uuid.uuid4()),
"method": method,
"params": params,
}
data = (json.dumps(payload) + "\n").encode()
with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as sock:
sock.connect(sock_path)
sock.sendall(data)
sock.shutdown(socket.SHUT_WR)
chunks = []
while True:
chunk = sock.recv(65536)
if not chunk:
break
chunks.append(chunk)
sys.stdout.write(b"".join(chunks).decode())
PY
) || {
log_error "RPC '${method}' failed to execute."
exit 1
}
echo "$response"
}
require_success() {
local resp=$1
local method=$2
local ok
ok=$(echo "$resp" | jq -r '.success // false')
if [[ "$ok" != "true" ]]; then
local err
err=$(echo "$resp" | jq -r '.error // empty')
log_error "RPC '${method}' failed: ${err:-unknown error}"
exit 1
fi
}
prepare_dirs() {
for dir in "$BASE_DIR" "$POOL_DIR" "$STAGING_DIR"; do
if $dry_run; then
log_info "[dry-run] ensure directory $dir owned by pulse-proxy:pulse-proxy"
continue
fi
mkdir -p "$dir"
chown pulse-proxy:pulse-proxy "$dir"
chmod 0750 "$dir"
done
}
clean_staging() {
if [[ -d "$STAGING_DIR" ]]; then
if $dry_run; then
log_info "[dry-run] would remove existing staging directory $STAGING_DIR"
else
rm -rf "$STAGING_DIR"
mkdir -p "$STAGING_DIR"
chown pulse-proxy:pulse-proxy "$STAGING_DIR"
chmod 0750 "$STAGING_DIR"
fi
fi
}
generate_keypair() {
local key_path="$STAGING_DIR/${SSH_KEY_FILE}"
if $dry_run; then
log_info "[dry-run] would generate new ${SSH_KEY_TYPE} keypair at $key_path"
return
fi
clean_staging
log_info "Generating new ${SSH_KEY_TYPE} keypair in staging..."
ssh-keygen -t "$SSH_KEY_TYPE" -N '' -C "$SSH_KEY_COMMENT rotation $(date -u +%Y%m%dT%H%M%SZ)" -f "$key_path" >/dev/null
chown pulse-proxy:pulse-proxy "$key_path" "${key_path}.pub"
chmod 0600 "$key_path"
chmod 0640 "${key_path}.pub"
}
ensure_cluster_keys() {
local key_dir=$1
local payload
payload=$(jq -cn --arg dir "$key_dir" '{key_dir: $dir}')
local resp
resp=$(json_rpc "ensure_cluster_keys" "$payload")
require_success "$resp" "ensure_cluster_keys"
log_info "Proxy reported successful key distribution."
}
list_nodes() {
local resp
resp=$(json_rpc "register_nodes")
require_success "$resp" "register_nodes"
echo "$resp" | jq -r '.data.nodes[]?.name // empty' | sort -u
}
verify_nodes() {
local key_file="$1"
local -a bad_nodes=()
local rc
while read -r node; do
[[ -z "$node" ]] && continue
log_info "Verifying SSH access on ${node}..."
if $dry_run; then
log_info "[dry-run] would run ssh -i $key_file root@${node} sensors -j"
continue
fi
if ssh -i "$key_file" -o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10 "root@${node}" "sensors -j" >/dev/null 2>&1; then
log_info "Verification succeeded for ${node}."
else
log_warn "Verification failed for ${node}."
bad_nodes+=("$node")
fi
done < <(list_nodes)
if ((${#bad_nodes[@]} > 0)); then
log_error "Verification failed for: ${bad_nodes[*]}"
exit 1
fi
}
swap_keys() {
local timestamp
timestamp=$(date -u +%Y%m%dT%H%M%SZ)
if $dry_run; then
log_info "[dry-run] would rotate directories:"
log_info "[dry-run] mv ${BACKUP_DIR} ${POOL_DIR}/prev.${timestamp} (if exists)"
log_info "[dry-run] mv ${ACTIVE_DIR} ${BACKUP_DIR}"
log_info "[dry-run] mv ${STAGING_DIR} ${ACTIVE_DIR}"
return
fi
log_info "Activating new keypair..."
if [[ -d "$BACKUP_DIR" ]]; then
mv "$BACKUP_DIR" "${POOL_DIR}/prev.${timestamp}"
fi
mv "$ACTIVE_DIR" "$BACKUP_DIR"
mv "$STAGING_DIR" "$ACTIVE_DIR"
chown -R pulse-proxy:pulse-proxy "$ACTIVE_DIR" "$BACKUP_DIR"
chmod 0750 "$ACTIVE_DIR" "$BACKUP_DIR"
chmod 0600 "$ACTIVE_DIR/${SSH_KEY_FILE}"
chmod 0640 "$ACTIVE_DIR/${SSH_KEY_FILE}.pub"
log_info "Key rotation complete. Previous keys stored at ${BACKUP_DIR}."
}
rollback_keys() {
if [[ ! -d "$BACKUP_DIR" ]]; then
log_error "No backup directory (${BACKUP_DIR}) present. Cannot rollback."
exit 1
fi
local timestamp
timestamp=$(date -u +%Y%m%dT%H%M%SZ)
if $dry_run; then
log_info "[dry-run] would rollback by swapping ${ACTIVE_DIR} with ${BACKUP_DIR}"
return
fi
log_warn "Rolling back to previous keypair..."
local failed_dir="${POOL_DIR}/failed.${timestamp}"
if [[ -d "$ACTIVE_DIR" ]]; then
mv "$ACTIVE_DIR" "$failed_dir"
fi
mv "$BACKUP_DIR" "$ACTIVE_DIR"
chown -R pulse-proxy:pulse-proxy "$ACTIVE_DIR"
chmod 0600 "$ACTIVE_DIR/${SSH_KEY_FILE}"
chmod 0640 "$ACTIVE_DIR/${SSH_KEY_FILE}.pub"
log_info "Rollback complete. Old keys preserved at ${failed_dir}."
log_info "Re-pushing restored keypair to cluster nodes..."
ensure_cluster_keys "$ACTIVE_DIR"
}
main() {
parse_args "$@"
require_root
require_cmds
if $do_rollback; then
ensure_socket
rollback_keys
return
fi
prepare_dirs
ensure_socket
generate_keypair
local staging_key="${STAGING_DIR}/${SSH_KEY_FILE}"
if [[ ! -f "${staging_key}" && $dry_run == false ]]; then
log_error "Staged private key missing at ${staging_key}"
exit 1
fi
ensure_cluster_keys "$STAGING_DIR"
verify_nodes "$staging_key"
swap_keys
log_info "Rotation workflow finished successfully."
}
main "$@"

View file

@ -0,0 +1,51 @@
[Unit]
Description=Pulse Sensor Proxy
Documentation=https://github.com/rcourtman/Pulse
After=network.target
[Service]
Type=simple
User=pulse-sensor-proxy
Group=pulse-sensor-proxy
WorkingDirectory=/var/lib/pulse-sensor-proxy
ExecStart=/usr/local/bin/pulse-sensor-proxy
Restart=on-failure
RestartSec=5s
# Runtime dirs/sockets
RuntimeDirectory=pulse-sensor-proxy
RuntimeDirectoryMode=0775
UMask=0007
# Core hardening
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=read-only
ReadWritePaths=/var/lib/pulse-sensor-proxy
ProtectKernelTunables=true
ProtectKernelModules=true
ProtectControlGroups=true
ProtectClock=true
PrivateTmp=true
PrivateDevices=true
ProtectProc=invisible
ProcSubset=pid
LockPersonality=true
RemoveIPC=true
RestrictSUIDSGID=true
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
RestrictNamespaces=true
SystemCallFilter=@system-service
SystemCallErrorNumber=EPERM
CapabilityBoundingSet=
AmbientCapabilities=
KeyringMode=private
LimitNOFILE=1024
# Logging
StandardOutput=journal
StandardError=journal
SyslogIdentifier=pulse-sensor-proxy
[Install]
WantedBy=multi-user.target

View file

@ -1,26 +0,0 @@
[Unit]
Description=Pulse Temperature Proxy
Documentation=https://github.com/rcourtman/Pulse
After=network.target
[Service]
Type=simple
User=root
ExecStart=/usr/local/bin/pulse-temp-proxy
Restart=on-failure
RestartSec=5s
# Security hardening
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/var/lib/pulse-temp-proxy /var/run
# Logging
StandardOutput=journal
StandardError=journal
SyslogIdentifier=pulse-temp-proxy
[Install]
WantedBy=multi-user.target