sec(hailo): expose --tls-ca / mTLS flags on the stats CLI (iter 189)

Completes the client-side TLS flag surface across all three operator
tools in this repo. iter-187 added the bench flags, iter-188 added
the embed flags; iter-189 brings the stats CLI to parity so an op
can snapshot fleet stats from a TLS-configured worker without
building a custom client. Same `#[cfg(feature = "tls")]` gating, same
partial-config + orphan-flag refusals as the other two binaries.

Smoke-tested against cognitum-v0:

  $ ruvector-hailo-stats --workers 100.77.59.83:50051 --tls-domain example.com
  Error: "--tls-domain / --tls-client-cert / --tls-client-key require --tls-ca"

  $ ruvector-hailo-stats --workers 100.77.59.83:50051 --tls-ca /nonexistent/ca.pem
  Error: "--tls-ca: transport error to <tls>: read ca pem at /nonexistent/ca.pem: No such file or directory (os error 2)"

  $ ruvector-hailo-stats --workers 100.77.59.83:50051
  worker     address                fingerprint    npu_t0  npu_t1  embeds  errors  avg_us  max_us  up_s
  static-0   100.77.59.83:50051     9c56e596...    53.2    52.7    6614    0       27325   42930   1044

Pi regression bench (c=4 b=1, 8 s × 3, post-settle):

  iter-188: 70.3, 69.0, 67.9 → mean 69.1/sec, p50=55-57 ms
  iter-189: 70.4, 70.1, 70.6 → mean 70.4/sec, p50=53-56 ms, p99=86-90 ms

  Δ throughput: +1.9% (within noise; stats CLI changes don't touch
                the bench/embed code paths)

The TLS server-side path (iter 99) is now fully callable from every
client tool that ships with the cluster crate. Next direction is
either deferred ops work (Pi-side cert generation + systemd unit
wiring for end-to-end mTLS smoke) or a pivot to perf research
(async vstream, mask-aware HEF compile).

Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
ruvnet 2026-05-03 18:30:46 -04:00
parent 168051bc1e
commit e7614036ec

View file

@ -47,6 +47,17 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// Useful for verifying a --workers-file manifest expands as expected,
// or for resolving a tailscale tag → IPs without hitting the workers.
let mut list_workers = false;
// Iter 189 — TLS / mTLS knobs (mirror of iter-187 bench + iter-188
// embed). Lets ops snapshot fleet stats from a TLS-configured
// worker with the same flag surface as the other client tools.
#[cfg(feature = "tls")]
let mut tls_ca: Option<String> = None;
#[cfg(feature = "tls")]
let mut tls_domain: Option<String> = None;
#[cfg(feature = "tls")]
let mut tls_client_cert: Option<String> = None;
#[cfg(feature = "tls")]
let mut tls_client_key: Option<String> = None;
let mut i = 1;
while i < args.len() {
@ -82,6 +93,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
}
"--strict-homogeneous" => { strict_homogeneous = true; i += 1; }
"--list-workers" => { list_workers = true; i += 1; }
#[cfg(feature = "tls")]
"--tls-ca" => { tls_ca = args.get(i + 1).cloned(); i += 2; }
#[cfg(feature = "tls")]
"--tls-domain" => { tls_domain = args.get(i + 1).cloned(); i += 2; }
#[cfg(feature = "tls")]
"--tls-client-cert" => { tls_client_cert = args.get(i + 1).cloned(); i += 2; }
#[cfg(feature = "tls")]
"--tls-client-key" => { tls_client_key = args.get(i + 1).cloned(); i += 2; }
"--help" | "-h" => { print_help(); return Ok(()); }
"--version" | "-V" => {
println!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"));
@ -154,6 +173,45 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// Cluster is built solely to call fleet_stats — dim + fingerprint
// values don't matter for that path.
//
// Iter 189 — TLS transport when --tls-ca is set. Same partial-config
// + orphan-flag refusals as iter-187/188 client tools.
#[cfg(feature = "tls")]
let transport = {
if let Some(ca_path) = tls_ca.as_deref() {
let addr0 = workers.first().map(|w| w.address.clone()).unwrap_or_default();
let domain = tls_domain.clone().unwrap_or_else(|| {
ruvector_hailo_cluster::tls::domain_from_address(&addr0).to_string()
});
let mut tls = ruvector_hailo_cluster::tls::TlsClient::from_pem_files(ca_path, &domain)
.map_err(|e| format!("--tls-ca: {}", e))?;
match (tls_client_cert.as_deref(), tls_client_key.as_deref()) {
(Some(c), Some(k)) => {
tls = tls.with_client_identity(c, k)
.map_err(|e| format!("--tls-client-cert/--tls-client-key: {}", e))?;
}
(Some(_), None) | (None, Some(_)) => {
return Err(
"--tls-client-cert and --tls-client-key must both be set or both unset".into(),
);
}
(None, None) => {}
}
Arc::new(GrpcTransport::with_tls(
std::time::Duration::from_secs(5),
std::time::Duration::from_secs(2),
tls,
)?)
} else {
if tls_domain.is_some() || tls_client_cert.is_some() || tls_client_key.is_some() {
return Err(
"--tls-domain / --tls-client-cert / --tls-client-key require --tls-ca".into(),
);
}
Arc::new(GrpcTransport::new()?)
}
};
#[cfg(not(feature = "tls"))]
let transport = Arc::new(GrpcTransport::new()?);
let cluster = HailoClusterEmbedder::new(workers, transport, 1, "")?;
@ -570,6 +628,19 @@ OPTIONS:
--workers-file manifest or resolving
a tailscale tag without hitting
workers.
--tls-ca <path> Enable HTTPS by trusting the PEM CA
bundle at <path>. Without this the
stats CLI dials plaintext gRPC.
(Requires --features tls.)
--tls-domain <name> SNI / SAN value to assert against
the server cert. Defaults to the
hostname half of the first worker
address.
--tls-client-cert <path> mTLS client cert (PEM). Pair with
--tls-client-key.
--tls-client-key <path> mTLS client private key (PEM). Both
cert and key must be set or both
unset.
--help, -h Print this help and exit.
--version, -V Print the binary name + version and exit.