mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 19:41:17 +00:00
Implements comprehensive node validation system to prevent SSRF attacks
via the temperature proxy. Addresses critical vulnerability where proxy
would SSH to any hostname/IP passing format validation.
Features:
- Configurable allowed_nodes list (hostnames, IPs, CIDR ranges)
- Automatic Proxmox cluster membership validation
- 5-minute cluster membership cache to reduce pvecm overhead
- strict_node_validation option for strict vs permissive modes
- New metric: pulse_proxy_node_validation_failures_total{node,reason}
- Logs blocked attempts at WARN level with 'potential SSRF attempt'
Configuration:
- allowed_nodes: [] (empty = auto-discover from cluster)
- strict_node_validation: true (require cluster membership)
Default behavior: Empty allowlist + Proxmox host = validate cluster
members (secure by default, backwards compatible).
Related to security audit 2025-11-07.
Co-authored-by: Codex <codex@openai.com>
321 lines
7.7 KiB
Go
321 lines
7.7 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"net"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
const defaultMetricsAddr = "127.0.0.1:9127"
|
|
|
|
// ProxyMetrics holds Prometheus metrics for the proxy
|
|
type ProxyMetrics struct {
|
|
rpcRequests *prometheus.CounterVec
|
|
rpcLatency *prometheus.HistogramVec
|
|
sshRequests *prometheus.CounterVec
|
|
sshLatency *prometheus.HistogramVec
|
|
queueDepth prometheus.Gauge
|
|
rateLimitHits prometheus.Counter
|
|
limiterRejects *prometheus.CounterVec
|
|
globalConcurrency prometheus.Gauge
|
|
limiterPenalties *prometheus.CounterVec
|
|
limiterPeers prometheus.Gauge
|
|
nodeValidationFailures *prometheus.CounterVec
|
|
readTimeouts prometheus.Counter
|
|
writeTimeouts prometheus.Counter
|
|
hostKeyChanges *prometheus.CounterVec
|
|
sshOutputOversized *prometheus.CounterVec
|
|
buildInfo *prometheus.GaugeVec
|
|
server *http.Server
|
|
registry *prometheus.Registry
|
|
}
|
|
|
|
// NewProxyMetrics creates and registers all metrics
|
|
func NewProxyMetrics(version string) *ProxyMetrics {
|
|
reg := prometheus.NewRegistry()
|
|
|
|
pm := &ProxyMetrics{
|
|
rpcRequests: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "pulse_proxy_rpc_requests_total",
|
|
Help: "Total RPC requests handled by method and result.",
|
|
},
|
|
[]string{"method", "result"},
|
|
),
|
|
rpcLatency: prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "pulse_proxy_rpc_latency_seconds",
|
|
Help: "RPC handler latency.",
|
|
Buckets: []float64{0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2.5, 5},
|
|
},
|
|
[]string{"method"},
|
|
),
|
|
sshRequests: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "pulse_proxy_ssh_requests_total",
|
|
Help: "SSH command executions by node and result.",
|
|
},
|
|
[]string{"node", "result"},
|
|
),
|
|
sshLatency: prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "pulse_proxy_ssh_latency_seconds",
|
|
Help: "SSH command latency per node.",
|
|
Buckets: []float64{0.1, 0.5, 1, 2.5, 5, 10, 30},
|
|
},
|
|
[]string{"node"},
|
|
),
|
|
queueDepth: prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "pulse_proxy_queue_depth",
|
|
Help: "Concurrent RPC requests being processed.",
|
|
},
|
|
),
|
|
rateLimitHits: prometheus.NewCounter(
|
|
prometheus.CounterOpts{
|
|
Name: "pulse_proxy_rate_limit_hits_total",
|
|
Help: "Number of RPC requests rejected due to rate limiting.",
|
|
},
|
|
),
|
|
limiterRejects: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "pulse_proxy_limiter_rejections_total",
|
|
Help: "Limiter rejections by reason.",
|
|
},
|
|
[]string{"reason", "peer"},
|
|
),
|
|
globalConcurrency: prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "pulse_proxy_global_concurrency_inflight",
|
|
Help: "Current global concurrency slots in use.",
|
|
},
|
|
),
|
|
limiterPenalties: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "pulse_proxy_limiter_penalties_total",
|
|
Help: "Penalty sleeps applied after validation failures.",
|
|
},
|
|
[]string{"reason", "peer"},
|
|
),
|
|
limiterPeers: prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "pulse_proxy_limiter_active_peers",
|
|
Help: "Number of peers tracked by the rate limiter.",
|
|
},
|
|
),
|
|
nodeValidationFailures: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "pulse_proxy_node_validation_failures_total",
|
|
Help: "Node validation failures by reason.",
|
|
},
|
|
[]string{"reason"},
|
|
),
|
|
readTimeouts: prometheus.NewCounter(
|
|
prometheus.CounterOpts{
|
|
Name: "pulse_proxy_read_timeouts_total",
|
|
Help: "Number of socket read timeouts.",
|
|
},
|
|
),
|
|
writeTimeouts: prometheus.NewCounter(
|
|
prometheus.CounterOpts{
|
|
Name: "pulse_proxy_write_timeouts_total",
|
|
Help: "Number of socket write timeouts.",
|
|
},
|
|
),
|
|
hostKeyChanges: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "pulse_proxy_hostkey_changes_total",
|
|
Help: "Detected SSH host key changes by node.",
|
|
},
|
|
[]string{"node"},
|
|
),
|
|
sshOutputOversized: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "pulse_proxy_ssh_output_oversized_total",
|
|
Help: "Number of SSH responses rejected for exceeding size limits.",
|
|
},
|
|
[]string{"node"},
|
|
),
|
|
buildInfo: prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pulse_proxy_build_info",
|
|
Help: "Proxy build metadata.",
|
|
},
|
|
[]string{"version"},
|
|
),
|
|
registry: reg,
|
|
}
|
|
|
|
reg.MustRegister(
|
|
pm.rpcRequests,
|
|
pm.rpcLatency,
|
|
pm.sshRequests,
|
|
pm.sshLatency,
|
|
pm.queueDepth,
|
|
pm.rateLimitHits,
|
|
pm.limiterRejects,
|
|
pm.globalConcurrency,
|
|
pm.limiterPenalties,
|
|
pm.limiterPeers,
|
|
pm.nodeValidationFailures,
|
|
pm.readTimeouts,
|
|
pm.writeTimeouts,
|
|
pm.hostKeyChanges,
|
|
pm.sshOutputOversized,
|
|
pm.buildInfo,
|
|
)
|
|
|
|
pm.buildInfo.WithLabelValues(version).Set(1)
|
|
|
|
return pm
|
|
}
|
|
|
|
// Start starts the metrics HTTP server on the specified address
|
|
func (m *ProxyMetrics) Start(addr string) error {
|
|
if addr == "" || strings.ToLower(addr) == "disabled" {
|
|
log.Info().Msg("Metrics server disabled")
|
|
return nil
|
|
}
|
|
|
|
if addr == "default" {
|
|
addr = defaultMetricsAddr
|
|
}
|
|
|
|
mux := http.NewServeMux()
|
|
mux.Handle("/metrics", promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{}))
|
|
|
|
ln, err := net.Listen("tcp", addr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
m.server = &http.Server{
|
|
Addr: addr,
|
|
Handler: mux,
|
|
ReadHeaderTimeout: 5 * time.Second,
|
|
}
|
|
|
|
go func() {
|
|
if err := m.server.Serve(ln); err != nil && err != http.ErrServerClosed {
|
|
log.Error().Err(err).Str("addr", addr).Msg("Metrics server stopped unexpectedly")
|
|
}
|
|
}()
|
|
|
|
log.Info().Str("addr", addr).Msg("Metrics server started")
|
|
return nil
|
|
}
|
|
|
|
// Shutdown gracefully shuts down the metrics server
|
|
func (m *ProxyMetrics) Shutdown(ctx context.Context) {
|
|
if m.server != nil {
|
|
_ = m.server.Shutdown(ctx)
|
|
}
|
|
}
|
|
|
|
// sanitizeNodeLabel converts a node name into a safe Prometheus label value
|
|
func sanitizeNodeLabel(node string) string {
|
|
const maxLen = 63
|
|
safe := strings.Builder{}
|
|
safe.Grow(len(node))
|
|
|
|
for _, r := range strings.ToLower(node) {
|
|
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' || r == '.' {
|
|
safe.WriteRune(r)
|
|
} else {
|
|
safe.WriteRune('_')
|
|
}
|
|
}
|
|
|
|
out := safe.String()
|
|
if len(out) > maxLen {
|
|
out = out[:maxLen]
|
|
}
|
|
if out == "" {
|
|
out = "unknown"
|
|
}
|
|
|
|
return out
|
|
}
|
|
|
|
func (m *ProxyMetrics) recordLimiterReject(reason, peer string) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.rateLimitHits.Inc()
|
|
m.limiterRejects.WithLabelValues(reason, peer).Inc()
|
|
}
|
|
|
|
func (m *ProxyMetrics) recordNodeValidationFailure(reason string) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.nodeValidationFailures.WithLabelValues(reason).Inc()
|
|
}
|
|
|
|
func (m *ProxyMetrics) recordReadTimeout() {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.readTimeouts.Inc()
|
|
}
|
|
|
|
func (m *ProxyMetrics) recordWriteTimeout() {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.writeTimeouts.Inc()
|
|
}
|
|
|
|
func (m *ProxyMetrics) recordSSHOutputOversized(node string) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
if node == "" {
|
|
node = "unknown"
|
|
}
|
|
m.sshOutputOversized.WithLabelValues(sanitizeNodeLabel(node)).Inc()
|
|
}
|
|
|
|
func (m *ProxyMetrics) recordHostKeyChange(node string) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
if node == "" {
|
|
node = "unknown"
|
|
}
|
|
m.hostKeyChanges.WithLabelValues(sanitizeNodeLabel(node)).Inc()
|
|
}
|
|
|
|
func (m *ProxyMetrics) incGlobalConcurrency() {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.globalConcurrency.Inc()
|
|
}
|
|
|
|
func (m *ProxyMetrics) decGlobalConcurrency() {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.globalConcurrency.Dec()
|
|
}
|
|
|
|
func (m *ProxyMetrics) recordPenalty(reason, peer string) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.limiterPenalties.WithLabelValues(reason, peer).Inc()
|
|
}
|
|
|
|
func (m *ProxyMetrics) setLimiterPeers(count int) {
|
|
if m == nil {
|
|
return
|
|
}
|
|
m.limiterPeers.Set(float64(count))
|
|
}
|