From 4e192bb6d6e537f43422bb0a61bd43d42e733910 Mon Sep 17 00:00:00 2001 From: ruvnet Date: Sun, 3 May 2026 18:36:37 -0400 Subject: [PATCH] sec(hailo): max_encoding_message_size cap + session test sweep (iter 190) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defense-in-depth response cap on the gRPC server. iter-180 capped the decode side at 64 KB; the encode side was uncapped (tonic default usize::MAX) even though the worker only ever generates Vec[384] ≈ 1.6 KB per unary embed. Cap at 16 KB (10× legitimate per-message size) so any hypothetical bug that ever returned a huge payload can't blow up downstream clients. Env-tunable via `RUVECTOR_MAX_RESPONSE_BYTES`, floor 4 KB. Worker startup banner now logs six DoS gates layered by iter: iter 180: max_decoding_message_size = 65536 iter 181: max_concurrent_streams = 256 iter 182: request_timeout_secs = 30 iter 183: max_pending_resets = 32 (CVE-2023-44487) iter 184: http2_keepalive_secs = 60 iter 190: max_encoding_message_size = 16384 Pi regression bench (c=4 b=1, 8 s × 3, post-deploy): iter 189: 70.4, 70.1, 70.6 → mean 70.4/sec, p50=53-56 ms iter 190: 68.9, 67.1, 70.6 → mean 68.9/sec, p50=55-56 ms Δ -2.1% in tailnet noise band; no encode-side enforcement firing on legitimate ~1.6 KB responses. Session test sweep (cargo test --features tls --tests --test-threads=1): - lib : 103/103 pass - all 13 integration suites : 74/74 pass - total : 177 tests, 0 failures - tls_roundtrip + secure_stack : 4/4 (TLS path validated) (One known-flaky test: rate_limit::tests::from_env_disabled_when_unset races other tests that set the same process-global env vars on the default parallel runner. Serial mode isolates it cleanly. Pre-existing issue, unrelated to iter 190.) Co-Authored-By: claude-flow --- .../ruvector-hailo-cluster/src/bin/worker.rs | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/crates/ruvector-hailo-cluster/src/bin/worker.rs b/crates/ruvector-hailo-cluster/src/bin/worker.rs index af3f1a65..17e87a6e 100644 --- a/crates/ruvector-hailo-cluster/src/bin/worker.rs +++ b/crates/ruvector-hailo-cluster/src/bin/worker.rs @@ -32,6 +32,13 @@ //! floor 4096). Caps per-RPC alloc surface //! well below tonic's ~4 MB transport //! default to shrink the DoS surface. +//! RUVECTOR_MAX_RESPONSE_BYTES gRPC max_encoding_message_size cap +//! (ADR-172 §3a iter 190 — default 16384, +//! floor 4096). Defense-in-depth on the +//! response side: the worker should never +//! emit > ~1.6 KB per embed, but capping +//! the encode budget bounds the blast +//! radius of any hypothetical leak. //! RUVECTOR_MAX_CONCURRENT_STREAMS HTTP/2 SETTINGS_MAX_CONCURRENT_STREAMS //! (ADR-172 §3a iter 181 — default 256, //! floor 8). Caps in-flight streams per @@ -716,12 +723,32 @@ fn main() -> Result<(), Box> { max_request_bytes = max_req_bytes, "gRPC max_decoding_message_size set (ADR-172 §3a iter 180 DoS gate)" ); + // Iter 190 — defense-in-depth response size cap. The worker + // controls its own response shape (Vec[384] ≈ 1.6 KB + + // tonic framing for unary embed; streaming embed pumps the + // same per-item shape), so the encode-side cap shouldn't fire + // under any normal code path. Setting it explicitly bounds + // the blast radius of a hypothetical bug that ever returned a + // huge response (e.g. an accidentally-leaked debug payload). + // 16 KB is 10× the legitimate per-message size and well + // outside any plausible legit response. + let max_resp_bytes: usize = std::env::var("RUVECTOR_MAX_RESPONSE_BYTES") + .ok() + .and_then(|s| s.parse::().ok()) + .unwrap_or(16 * 1024) + .max(4 * 1024); + info!( + max_response_bytes = max_resp_bytes, + "gRPC max_encoding_message_size set (ADR-172 §3a iter 190 belt-and-suspenders)" + ); // Note: `max_decoding_message_size` lives on the generated // `EmbeddingServer`, not tonic's `InterceptedService` wrapper — // apply it before wrapping. The `with_interceptor` static // helper would re-build the inner with default limits, so we // skip it and call `InterceptedService::new` ourselves. - let embed_server = EmbeddingServer::new(svc).max_decoding_message_size(max_req_bytes); + let embed_server = EmbeddingServer::new(svc) + .max_decoding_message_size(max_req_bytes) + .max_encoding_message_size(max_resp_bytes); let intercepted = tonic::service::interceptor::InterceptedService::new( embed_server, interceptor,