mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-30 03:53:34 +00:00
sec(hailo): max_encoding_message_size cap + session test sweep (iter 190)
Defense-in-depth response cap on the gRPC server. iter-180 capped the decode side at 64 KB; the encode side was uncapped (tonic default usize::MAX) even though the worker only ever generates Vec<f32>[384] ≈ 1.6 KB per unary embed. Cap at 16 KB (10× legitimate per-message size) so any hypothetical bug that ever returned a huge payload can't blow up downstream clients. Env-tunable via `RUVECTOR_MAX_RESPONSE_BYTES`, floor 4 KB. Worker startup banner now logs six DoS gates layered by iter: iter 180: max_decoding_message_size = 65536 iter 181: max_concurrent_streams = 256 iter 182: request_timeout_secs = 30 iter 183: max_pending_resets = 32 (CVE-2023-44487) iter 184: http2_keepalive_secs = 60 iter 190: max_encoding_message_size = 16384 Pi regression bench (c=4 b=1, 8 s × 3, post-deploy): iter 189: 70.4, 70.1, 70.6 → mean 70.4/sec, p50=53-56 ms iter 190: 68.9, 67.1, 70.6 → mean 68.9/sec, p50=55-56 ms Δ -2.1% in tailnet noise band; no encode-side enforcement firing on legitimate ~1.6 KB responses. Session test sweep (cargo test --features tls --tests --test-threads=1): - lib : 103/103 pass - all 13 integration suites : 74/74 pass - total : 177 tests, 0 failures - tls_roundtrip + secure_stack : 4/4 (TLS path validated) (One known-flaky test: rate_limit::tests::from_env_disabled_when_unset races other tests that set the same process-global env vars on the default parallel runner. Serial mode isolates it cleanly. Pre-existing issue, unrelated to iter 190.) Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
e7614036ec
commit
4e192bb6d6
1 changed files with 28 additions and 1 deletions
|
|
@ -32,6 +32,13 @@
|
|||
//! floor 4096). Caps per-RPC alloc surface
|
||||
//! well below tonic's ~4 MB transport
|
||||
//! default to shrink the DoS surface.
|
||||
//! RUVECTOR_MAX_RESPONSE_BYTES gRPC max_encoding_message_size cap
|
||||
//! (ADR-172 §3a iter 190 — default 16384,
|
||||
//! floor 4096). Defense-in-depth on the
|
||||
//! response side: the worker should never
|
||||
//! emit > ~1.6 KB per embed, but capping
|
||||
//! the encode budget bounds the blast
|
||||
//! radius of any hypothetical leak.
|
||||
//! RUVECTOR_MAX_CONCURRENT_STREAMS HTTP/2 SETTINGS_MAX_CONCURRENT_STREAMS
|
||||
//! (ADR-172 §3a iter 181 — default 256,
|
||||
//! floor 8). Caps in-flight streams per
|
||||
|
|
@ -716,12 +723,32 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
max_request_bytes = max_req_bytes,
|
||||
"gRPC max_decoding_message_size set (ADR-172 §3a iter 180 DoS gate)"
|
||||
);
|
||||
// Iter 190 — defense-in-depth response size cap. The worker
|
||||
// controls its own response shape (Vec<f32>[384] ≈ 1.6 KB +
|
||||
// tonic framing for unary embed; streaming embed pumps the
|
||||
// same per-item shape), so the encode-side cap shouldn't fire
|
||||
// under any normal code path. Setting it explicitly bounds
|
||||
// the blast radius of a hypothetical bug that ever returned a
|
||||
// huge response (e.g. an accidentally-leaked debug payload).
|
||||
// 16 KB is 10× the legitimate per-message size and well
|
||||
// outside any plausible legit response.
|
||||
let max_resp_bytes: usize = std::env::var("RUVECTOR_MAX_RESPONSE_BYTES")
|
||||
.ok()
|
||||
.and_then(|s| s.parse::<usize>().ok())
|
||||
.unwrap_or(16 * 1024)
|
||||
.max(4 * 1024);
|
||||
info!(
|
||||
max_response_bytes = max_resp_bytes,
|
||||
"gRPC max_encoding_message_size set (ADR-172 §3a iter 190 belt-and-suspenders)"
|
||||
);
|
||||
// Note: `max_decoding_message_size` lives on the generated
|
||||
// `EmbeddingServer`, not tonic's `InterceptedService` wrapper —
|
||||
// apply it before wrapping. The `with_interceptor` static
|
||||
// helper would re-build the inner with default limits, so we
|
||||
// skip it and call `InterceptedService::new` ourselves.
|
||||
let embed_server = EmbeddingServer::new(svc).max_decoding_message_size(max_req_bytes);
|
||||
let embed_server = EmbeddingServer::new(svc)
|
||||
.max_decoding_message_size(max_req_bytes)
|
||||
.max_encoding_message_size(max_resp_bytes);
|
||||
let intercepted = tonic::service::interceptor::InterceptedService::new(
|
||||
embed_server,
|
||||
interceptor,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue