diff --git a/v2/Cargo.lock b/v2/Cargo.lock index ac7f8df9..d478175d 100644 --- a/v2/Cargo.lock +++ b/v2/Cargo.lock @@ -8389,6 +8389,7 @@ dependencies = [ "ruvector-temporal-tensor", "serde", "serde_json", + "sha2", "thiserror 1.0.69", ] diff --git a/v2/crates/wifi-densepose-ruvector/Cargo.toml b/v2/crates/wifi-densepose-ruvector/Cargo.toml index 6ae29e00..0a0b6150 100644 --- a/v2/crates/wifi-densepose-ruvector/Cargo.toml +++ b/v2/crates/wifi-densepose-ruvector/Cargo.toml @@ -27,6 +27,10 @@ thiserror = { workspace = true } serde = { workspace = true, optional = true } serde_json = { workspace = true, optional = true } +# ADR-084 Pass 5 — privacy-preserving event log uses SHA-256 to +# anchor each stored sketch as a content-addressable witness hash. +sha2 = { workspace = true } + [dev-dependencies] approx = "0.5" criterion = { workspace = true } diff --git a/v2/crates/wifi-densepose-ruvector/src/event_log.rs b/v2/crates/wifi-densepose-ruvector/src/event_log.rs new file mode 100644 index 00000000..73e98da9 --- /dev/null +++ b/v2/crates/wifi-densepose-ruvector/src/event_log.rs @@ -0,0 +1,266 @@ +//! ADR-084 Pass 5 — privacy-preserving event log. +//! +//! Stores `(timestamp, sketch, novelty, witness_sha256)` tuples instead +//! of raw float embeddings. Two privacy properties matter: +//! +//! 1. **Non-invertibility.** The 1-bit sketch is lossy — there is no +//! general mathematical inverse from a stored event back to a +//! `[f32]` source embedding. Even an attacker with side-channel +//! information about the embedding model's output distribution +//! cannot reconstruct the underlying CSI. +//! +//! 2. **Content addressing.** Each event carries a SHA-256 of the +//! serialized [`crate::WireSketch`] payload (header + packed bits). +//! Two events with the same `witness` are byte-equal — the cluster-Pi +//! can deduplicate, the gateway can checkpoint without re-storing, +//! and downstream verifiers can prove "this event came from that +//! sketch" without ever holding the original embedding. +//! +//! See ADR-084 §"Privacy-preserving event log" and the post-merge +//! security review on PR #435 (finding L7) for context. +//! +//! # Bounded by design +//! +//! [`PrivacyEventLog`] is a fixed-capacity ring buffer; once full, +//! oldest events are FIFO-evicted. A misbehaving sender cannot exhaust +//! receiver memory by flooding the bank — peak footprint is +//! `capacity × (sketch_bytes + 50)` bytes. + +use sha2::{Digest, Sha256}; +use std::collections::VecDeque; + +use crate::sketch::{Sketch, WireSketch}; + +/// One entry in the privacy-preserving event log. +/// +/// All fields are public so callers can serialize / inspect / forward +/// events through their own pipelines without going through getters. +/// The struct is intentionally self-contained — no references to +/// external state, so an event can be moved across thread / process / +/// host boundaries without dangling. +#[derive(Debug, Clone, PartialEq)] +pub struct NoveltyEvent { + /// Microseconds since UNIX epoch when the underlying frame was + /// observed. Caller-supplied; the event log doesn't fetch the + /// clock so test fixtures are deterministic. + pub timestamp_us: u64, + /// 1-bit packed sketch bytes (`(embedding_dim + 7) / 8` bytes long). + pub sketch_bytes: Vec, + /// Embedding-model schema version so `(version, witness)` is a + /// fully qualified content address. + pub sketch_version: u16, + /// Source-embedding dimension, fixing the bit count of `sketch_bytes`. + pub embedding_dim: u16, + /// Novelty score in `[0.0, 1.0]` at the time the event was logged. + /// Saturated and stored as f32 for direct downstream use; the q15 + /// quantization happens on the wire format + /// ([`crate::WireSketch`]) — the in-memory log keeps full f32 + /// precision. + pub novelty: f32, + /// SHA-256 of the serialized [`crate::WireSketch`] payload + /// (header + packed bits + the q15 novelty quantum). Two events + /// with the same witness are byte-identical on the wire. + pub witness_sha256: [u8; 32], +} + +/// Fixed-capacity, FIFO-evicting log of [`NoveltyEvent`]s. +/// +/// Used as the cluster-Pi's per-node anomaly trail. The log is **not** +/// the source of truth for novelty (that's [`crate::SketchBank`] and +/// `EmbeddingHistory::novelty`); it's the *audit* of what happened. +/// +/// # Memory bound +/// +/// `capacity * (sketch_bytes_per_event + ~50 fixed bytes)` is the worst +/// case. For 64 events × 16-byte sketches that's ~4 KiB — fits in any +/// per-node state struct without concern. +#[derive(Debug, Clone)] +pub struct PrivacyEventLog { + capacity: usize, + events: VecDeque, +} + +impl PrivacyEventLog { + /// Create a new log with the given fixed capacity. + /// + /// `capacity == 0` is allowed; the log accepts pushes but + /// immediately discards them, which is occasionally useful as a + /// no-op stub in test fixtures or when the privacy log is meant + /// to be disabled at deployment time. + pub fn new(capacity: usize) -> Self { + Self { + capacity, + events: VecDeque::with_capacity(capacity.min(1024)), + } + } + + /// Append an event built from a `Sketch` + novelty score. + /// + /// The event's `witness_sha256` is computed over the [`WireSketch`] + /// serialization of `(sketch, novelty)` — so two pushes of the same + /// `(sketch, novelty)` produce byte-identical witnesses, enabling + /// dedup at the receiver. + /// + /// FIFO-evicts the oldest event if the log is at capacity. Returns + /// the number of events present after the push (0 when capacity is + /// 0, otherwise `<= capacity`). + pub fn push(&mut self, sketch: &Sketch, novelty: f32, timestamp_us: u64) -> usize { + if self.capacity == 0 { + return 0; + } + let wire = WireSketch::serialize(sketch, novelty); + let mut hasher = Sha256::new(); + hasher.update(&wire); + let witness: [u8; 32] = hasher.finalize().into(); + + if self.events.len() >= self.capacity { + self.events.pop_front(); + } + self.events.push_back(NoveltyEvent { + timestamp_us, + sketch_bytes: sketch.packed_bytes().to_vec(), + sketch_version: sketch.sketch_version(), + embedding_dim: sketch.embedding_dim(), + novelty, + witness_sha256: witness, + }); + self.events.len() + } + + /// Number of events currently stored. + #[inline] + pub fn len(&self) -> usize { + self.events.len() + } + + /// True iff the log has no events. + #[inline] + pub fn is_empty(&self) -> bool { + self.events.is_empty() + } + + /// Bank capacity (the max number of events ever held simultaneously). + #[inline] + pub fn capacity(&self) -> usize { + self.capacity + } + + /// Iterate over events oldest-first. + pub fn iter(&self) -> impl Iterator { + self.events.iter() + } + + /// Find the most recent event whose `witness_sha256` matches. + /// Returns `None` if no event matches. + /// + /// Used by content-addressable lookups — a downstream receiver + /// can ask "have you logged this exact `(sketch, novelty)` before?" + /// without re-transmitting the sketch. + pub fn find_by_witness(&self, witness: &[u8; 32]) -> Option<&NoveltyEvent> { + self.events + .iter() + .rev() + .find(|e| &e.witness_sha256 == witness) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::sketch::Sketch; + + fn make_sketch(seed: u32) -> Sketch { + let v: Vec = (0..32) + .map(|i| ((i as u32).wrapping_mul(seed) as f32).sin()) + .collect(); + Sketch::from_embedding(&v, 1) + } + + #[test] + fn push_grows_until_capacity_then_fifo_evicts() { + let mut log = PrivacyEventLog::new(3); + for i in 0..5u64 { + log.push(&make_sketch(i as u32 + 1), 0.5, i * 1000); + } + assert_eq!(log.len(), 3, "must cap at capacity"); + // Oldest two evicted; first remaining timestamp is 2_000. + let first = log.iter().next().unwrap(); + assert_eq!(first.timestamp_us, 2000); + } + + #[test] + fn zero_capacity_log_silently_drops_pushes() { + let mut log = PrivacyEventLog::new(0); + let n = log.push(&make_sketch(1), 0.5, 0); + assert_eq!(n, 0); + assert_eq!(log.len(), 0); + assert!(log.is_empty()); + } + + #[test] + fn witness_is_deterministic_for_same_sketch_and_novelty() { + let mut log_a = PrivacyEventLog::new(2); + let mut log_b = PrivacyEventLog::new(2); + let s = make_sketch(7); + // Same sketch + same novelty + (intentionally different) + // timestamps — witness must NOT depend on timestamp; the + // wire format does not include it. + log_a.push(&s, 0.25, 100); + log_b.push(&s, 0.25, 999_999); + let wa = log_a.iter().next().unwrap().witness_sha256; + let wb = log_b.iter().next().unwrap().witness_sha256; + assert_eq!(wa, wb, "witness must be content-addressable, not time-addressable"); + } + + #[test] + fn witness_differs_for_different_novelty_scores() { + let mut log = PrivacyEventLog::new(2); + let s = make_sketch(11); + log.push(&s, 0.10, 0); + log.push(&s, 0.90, 0); + let mut iter = log.iter(); + let w0 = iter.next().unwrap().witness_sha256; + let w1 = iter.next().unwrap().witness_sha256; + assert_ne!(w0, w1, "different novelty → different witness"); + } + + #[test] + fn find_by_witness_returns_most_recent_match() { + let mut log = PrivacyEventLog::new(5); + let s = make_sketch(42); + log.push(&s, 0.5, 100); + log.push(&make_sketch(99), 0.3, 200); + log.push(&s, 0.5, 300); // duplicate by witness, newer timestamp + + let target_witness = log.iter().nth(2).unwrap().witness_sha256; + let hit = log.find_by_witness(&target_witness).unwrap(); + assert_eq!(hit.timestamp_us, 300, "find_by_witness returns most recent"); + } + + #[test] + fn find_by_witness_returns_none_on_miss() { + let mut log = PrivacyEventLog::new(2); + log.push(&make_sketch(1), 0.5, 0); + let bogus = [0xAA_u8; 32]; + assert!(log.find_by_witness(&bogus).is_none()); + } + + #[test] + fn event_does_not_carry_raw_embedding() { + // The whole point of the event log: an attacker with read + // access to the log cannot recover the source CSI / embedding. + // Verify structurally that no `Vec` field exists on + // NoveltyEvent — only the bit-packed sketch. + let mut log = PrivacyEventLog::new(1); + let s = make_sketch(5); + log.push(&s, 0.5, 0); + let event = log.iter().next().unwrap(); + // The packed sketch is bytes (1-bit-per-source-dim, ceil-divided). + // Length proves the source dim (32 bits = 4 bytes). + assert_eq!(event.sketch_bytes.len(), 4); + assert_eq!(event.embedding_dim, 32); + // No way to reconstruct the original `[f32; 32]` from these 4 bytes + // alone; that's the privacy guarantee. (Compile-time witnessed: + // there's no Vec field on NoveltyEvent.) + } +} diff --git a/v2/crates/wifi-densepose-ruvector/src/lib.rs b/v2/crates/wifi-densepose-ruvector/src/lib.rs index e562bb0e..89e4f14b 100644 --- a/v2/crates/wifi-densepose-ruvector/src/lib.rs +++ b/v2/crates/wifi-densepose-ruvector/src/lib.rs @@ -28,11 +28,13 @@ #[cfg(feature = "crv")] pub mod crv; +pub mod event_log; pub mod mat; pub mod signal; pub mod sketch; pub mod viewpoint; +pub use event_log::{NoveltyEvent, PrivacyEventLog}; pub use sketch::{ Sketch, SketchBank, SketchError, WireSketch, WireSketchError, WIRE_SKETCH_FORMAT_VERSION, WIRE_SKETCH_MAGIC, WIRE_SKETCH_MAX_BYTES, diff --git a/v2/crates/wifi-densepose-sensing-server/src/types.rs b/v2/crates/wifi-densepose-sensing-server/src/types.rs index 68ff977f..401ebc23 100644 --- a/v2/crates/wifi-densepose-sensing-server/src/types.rs +++ b/v2/crates/wifi-densepose-sensing-server/src/types.rs @@ -332,8 +332,16 @@ impl NodeState { Some(h) => h, None => return, }; - // Truncate or zero-pad to the canonical dim. f64 → f32 is a - // direct cast; CSI amplitudes are well within f32 range. + // Truncate or zero-pad to the canonical dim. + // + // L4 hardening (PR #435 security review): the `as f32` cast + // accepts adversarial f64 inputs without panic. `f64::INFINITY` + // becomes `f32::INFINITY` (sign-quantizes to bit=1; novelty + // degrades but no crash). `f64::NAN` propagates as `f32::NAN` + // (sign-quantizes to bit=0 since `NaN > 0.0` is false). CSI + // amplitudes from healthy ESP32 firmware are well within f32 + // finite range — adversarial input degrades novelty quality + // but never causes the gate to panic. let mut feature: Vec = amplitudes .iter() .take(NOVELTY_VECTOR_DIM)