hardening(ruvector): L2 from PR #435 review — overflow on >u16::MAX dims

Pass 1.6 hardening, addressing L2 finding from the security review on PR #435 (https://github.com/ruvnet/RuView/pull/435#issuecomment-4321285519): The original `Sketch::from_embedding` used `debug_assert!` for the `embedding.len() <= u16::MAX` invariant, which compiled out in release builds. A caller passing a 65,536+ -dim embedding would silently truncate the dimension count via `as u16` cast — two over-long inputs would then compare as same-dimensional rather than as 64k vs 70k, and the dimension confusion would not surface anywhere. Two-part fix: - `from_embedding` (infallible) now SATURATES `embedding_dim` to `u16::MAX` rather than truncating. Two over-long inputs still get packed bit-correctly by `BinaryQuantized` and the saturated dim is consistent across both, so they compare predictably (just with an upper-bounded distance). - `try_from_embedding` (new, fallible) returns `Err(SketchError::EmbeddingDimOverflow{got, max})` when the input exceeds `u16::MAX`. Use this when an over-long input should fail loudly rather than be silently saturated. - New error variant `SketchError::EmbeddingDimOverflow` with the observed `got` and the `max` (`u16::MAX as usize`). - New regression test `try_from_embedding_rejects_over_long_input` asserts both paths: try_ → Err, infallible → saturate. Validated: - 13 sketch unit tests pass (was 12; +1 for L2 boundary). - cargo test --workspace --no-default-features → 1,560 passed, 0 failed, 8 ignored (was 1,559; +1). - ESP32-S3 on COM7 streaming live CSI (cb #100, fresh boot RSSI -48 dBm). Co-Authored-By: claude-flow <ruv@ruv.net>
2026-04-28 05:59:32 +00:00 · 2026-04-26 00:56:32 -04:00 · 2026-04-26 00:56:32 -04:00 · 4dd1f794d6
commit 4dd1f794d6
parent 4e536149f6
1 changed files with 69 additions and 5 deletions
--- a/v2/crates/wifi-densepose-ruvector/src/sketch.rs
+++ b/v2/crates/wifi-densepose-ruvector/src/sketch.rs
@ -67,6 +67,22 @@ pub enum SketchError {
        /// Dimension on the incoming sketch.
        query: u16,
    },
+
+    /// Embedding dimension exceeds `u16::MAX` (65,535).
+    ///
+    /// Returned by [`Sketch::try_from_embedding`] to surface what
+    /// `from_embedding`'s `debug_assert!` would have hidden in release
+    /// builds — silently truncating the dimension count would otherwise
+    /// let two different-length embeddings compare as if they were the
+    /// same length. See ADR-084 §"Versioning" and the security-review
+    /// finding L2 on PR #435 for context.
+    #[error("embedding dimension {got} exceeds u16::MAX ({max})")]
+    EmbeddingDimOverflow {
+        /// Actual length of the input embedding.
+        got: usize,
+        /// Maximum supported dimension (`u16::MAX`).
+        max: usize,
+    },
 }

 /// A 1-bit binary sketch of a dense embedding vector.
@ -105,17 +121,39 @@ impl Sketch {
    /// (e.g., a re-trained AETHER head). Two sketches with different
    /// `sketch_version`s are not comparable.
    pub fn from_embedding(embedding: &[f32], sketch_version: u16) -> Self {
-        debug_assert!(
-            embedding.len() <= u16::MAX as usize,
-            "embedding dimension exceeds u16::MAX"
-        );
+        // L2 hardening (PR #435 security review): in release builds the
+        // previous `debug_assert!` was compiled out, allowing silent
+        // u16-truncation when `embedding.len() > u16::MAX`. Saturate to
+        // u16::MAX rather than truncate so two over-long embeddings
+        // compare as same-dimensional rather than as accidentally-short.
+        // Callers that need a hard error should use `try_from_embedding`.
+        let embedding_dim = embedding.len().min(u16::MAX as usize) as u16;
        Self {
            inner: BinaryQuantized::quantize(embedding),
-            embedding_dim: embedding.len() as u16,
+            embedding_dim,
            sketch_version,
        }
    }

+    /// Fallible constructor that rejects embeddings longer than
+    /// `u16::MAX` (65,535) instead of saturating, raising
+    /// [`SketchError::EmbeddingDimOverflow`]. Use this when an
+    /// over-long input should fail loudly rather than silently
+    /// produce a sketch that disagrees with its source on
+    /// `embedding_dim`.
+    pub fn try_from_embedding(
+        embedding: &[f32],
+        sketch_version: u16,
+    ) -> Result<Self, SketchError> {
+        if embedding.len() > u16::MAX as usize {
+            return Err(SketchError::EmbeddingDimOverflow {
+                got: embedding.len(),
+                max: u16::MAX as usize,
+            });
+        }
+        Ok(Self::from_embedding(embedding, sketch_version))
+    }
+
    /// Hamming distance to another sketch in `[0, embedding_dim]`.
    ///
    /// Returns `None` if the two sketches have different `embedding_dim` or
@ -486,6 +524,32 @@ mod tests {
        assert!((novelty - 0.5).abs() < 1e-6);
    }

+    #[test]
+    fn try_from_embedding_rejects_over_long_input() {
+        // L2 security-review finding (PR #435): the infallible
+        // `from_embedding` saturates to u16::MAX; the fallible
+        // `try_from_embedding` must surface the overflow so callers can
+        // detect the misuse. We can't actually allocate a 65,536-f32
+        // vector in unit tests cheaply (that's 256 KiB, fine), but we
+        // can fabricate a `Vec` with `len() > u16::MAX` and check the
+        // error path.
+        let too_long: Vec<f32> = vec![0.5; (u16::MAX as usize) + 1];
+        let err = Sketch::try_from_embedding(&too_long, 1).unwrap_err();
+        match err {
+            SketchError::EmbeddingDimOverflow { got, max } => {
+                assert_eq!(got, (u16::MAX as usize) + 1);
+                assert_eq!(max, u16::MAX as usize);
+            }
+            _ => panic!("expected EmbeddingDimOverflow, got {err:?}"),
+        }
+
+        // The infallible path should *saturate* to u16::MAX rather
+        // than panic in release. Verify the saturation is observable
+        // on `embedding_dim()`.
+        let s = Sketch::from_embedding(&too_long, 1);
+        assert_eq!(s.embedding_dim(), u16::MAX);
+    }
+
    #[test]
    fn topk_rejects_query_with_wrong_schema() {
        let mut bank = SketchBank::with_schema(4, 1);