feat: add comprehensive node cleanup system

Implements automated cleanup workflow when nodes are deleted from Pulse, removing all monitoring footprint from the host. Changes include a new RPC handler in the sensor proxy for cleanup requests, enhanced node deletion modal with detailed cleanup explanations, and improved SSH key management with proper tagging for atomic updates.
This commit is contained in:
rcourtman 2025-10-17 18:53:45 +00:00
parent d0f7fd6404
commit 123e0f04ca
8 changed files with 487 additions and 130 deletions

View file

@ -0,0 +1,88 @@
package main
import (
"context"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"time"
"github.com/rs/zerolog"
)
const cleanupRequestFilename = "cleanup-request.json"
func (p *Proxy) cleanupRequestPath() (string, error) {
workDir := p.workDir
if workDir == "" {
workDir = defaultWorkDir()
}
if workDir == "" {
return "", errors.New("cleanup working directory not configured")
}
return filepath.Join(workDir, cleanupRequestFilename), nil
}
func (p *Proxy) handleRequestCleanup(ctx context.Context, req *RPCRequest, logger zerolog.Logger) (interface{}, error) {
cleanupPath, err := p.cleanupRequestPath()
if err != nil {
return nil, err
}
dir := filepath.Dir(cleanupPath)
if err := os.MkdirAll(dir, 0o750); err != nil {
return nil, fmt.Errorf("ensure cleanup directory: %w", err)
}
payload := map[string]any{
"requestedAt": time.Now().UTC().Format(time.RFC3339),
}
if req != nil && req.Params != nil {
if host, ok := req.Params["host"].(string); ok && host != "" {
payload["host"] = host
}
if reason, ok := req.Params["reason"].(string); ok && reason != "" {
payload["reason"] = reason
}
}
data, err := json.Marshal(payload)
if err != nil {
return nil, fmt.Errorf("encode cleanup payload: %w", err)
}
tmpFile, err := os.CreateTemp(dir, "cleanup-request-*.tmp")
if err != nil {
return nil, fmt.Errorf("prepare cleanup signal: %w", err)
}
tmpName := tmpFile.Name()
if _, err := tmpFile.Write(append(data, '\n')); err != nil {
tmpFile.Close()
os.Remove(tmpName)
return nil, fmt.Errorf("write cleanup payload: %w", err)
}
if err := tmpFile.Chmod(0o600); err != nil {
logger.Warn().Err(err).Str("path", tmpName).Msg("Failed to set cleanup payload permissions")
}
if err := tmpFile.Close(); err != nil {
os.Remove(tmpName)
return nil, fmt.Errorf("close cleanup payload: %w", err)
}
// Replace any existing request atomically so systemd path units trigger on change.
if err := os.Rename(tmpName, cleanupPath); err != nil {
os.Remove(tmpName)
return nil, fmt.Errorf("activate cleanup payload: %w", err)
}
logger.Info().
Str("path", cleanupPath).
Interface("payload", payload).
Msg("Cleanup request signalled")
return map[string]any{"queued": true}, nil
}

View file

@ -35,6 +35,10 @@ const (
maxRequestBytes = 16 * 1024 // 16 KiB max request size
)
func defaultWorkDir() string {
return "/var/lib/pulse-sensor-proxy"
}
var rootCmd = &cobra.Command{
Use: "pulse-sensor-proxy",
Short: "Pulse Sensor Proxy - Secure sensor data bridge for containerized Pulse",
@ -74,6 +78,7 @@ func main() {
type Proxy struct {
socketPath string
sshKeyPath string
workDir string
listener net.Listener
rateLimiter *rateLimiter
nodeGate *nodeGate
@ -93,6 +98,7 @@ const (
RPCRegisterNodes = "register_nodes"
RPCGetTemperature = "get_temperature"
RPCGetStatus = "get_status"
RPCRequestCleanup = "request_cleanup"
)
// RPCRequest represents a request from Pulse
@ -158,12 +164,20 @@ func runProxy() {
metrics: metrics,
}
if wd, err := os.Getwd(); err == nil {
proxy.workDir = wd
} else {
log.Warn().Err(err).Msg("Failed to determine working directory; using default")
proxy.workDir = defaultWorkDir()
}
// Register RPC method handlers
proxy.router = map[string]handlerFunc{
RPCGetStatus: proxy.handleGetStatusV2,
RPCEnsureClusterKeys: proxy.handleEnsureClusterKeysV2,
RPCRegisterNodes: proxy.handleRegisterNodesV2,
RPCGetTemperature: proxy.handleGetTemperatureV2,
RPCRequestCleanup: proxy.handleRequestCleanup,
}
if err := proxy.initAuthRules(); err != nil {

View file

@ -136,7 +136,7 @@ func (p *Proxy) testSSHConnection(nodeHost string) error {
privKeyPath := filepath.Join(p.sshKeyPath, "id_ed25519")
cmd := fmt.Sprintf(
`ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s "echo test"`,
`ssh -i %[1]s -T -n -o BatchMode=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ConnectTimeout=5 root@%[2]s "echo test"`,
privKeyPath,
nodeHost,
)
@ -166,7 +166,7 @@ func (p *Proxy) getTemperatureViaSSH(nodeHost string) (string, error) {
// Since we use ForceCommand="sensors -j", any SSH command will run sensors
// We don't need to specify the command
cmd := fmt.Sprintf(
`ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@%s ""`,
`ssh -i %[1]s -T -n -o BatchMode=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ConnectTimeout=5 root@%[2]s ""`,
privKeyPath,
nodeHost,
)