mirror of
https://github.com/ruvnet/RuView.git
synced 2026-04-28 05:59:32 +00:00
hardening(ruvector): L2 from PR #435 review — overflow on >u16::MAX dims
Pass 1.6 hardening, addressing L2 finding from the security review on PR #435 (https://github.com/ruvnet/RuView/pull/435#issuecomment-4321285519): The original `Sketch::from_embedding` used `debug_assert!` for the `embedding.len() <= u16::MAX` invariant, which compiled out in release builds. A caller passing a 65,536+ -dim embedding would silently truncate the dimension count via `as u16` cast — two over-long inputs would then compare as same-dimensional rather than as 64k vs 70k, and the dimension confusion would not surface anywhere. Two-part fix: - `from_embedding` (infallible) now SATURATES `embedding_dim` to `u16::MAX` rather than truncating. Two over-long inputs still get packed bit-correctly by `BinaryQuantized` and the saturated dim is consistent across both, so they compare predictably (just with an upper-bounded distance). - `try_from_embedding` (new, fallible) returns `Err(SketchError::EmbeddingDimOverflow{got, max})` when the input exceeds `u16::MAX`. Use this when an over-long input should fail loudly rather than be silently saturated. - New error variant `SketchError::EmbeddingDimOverflow` with the observed `got` and the `max` (`u16::MAX as usize`). - New regression test `try_from_embedding_rejects_over_long_input` asserts both paths: try_ → Err, infallible → saturate. Validated: - 13 sketch unit tests pass (was 12; +1 for L2 boundary). - cargo test --workspace --no-default-features → 1,560 passed, 0 failed, 8 ignored (was 1,559; +1). - ESP32-S3 on COM7 streaming live CSI (cb #100, fresh boot RSSI -48 dBm). Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
4e536149f6
commit
4dd1f794d6
1 changed files with 69 additions and 5 deletions
|
|
@ -67,6 +67,22 @@ pub enum SketchError {
|
|||
/// Dimension on the incoming sketch.
|
||||
query: u16,
|
||||
},
|
||||
|
||||
/// Embedding dimension exceeds `u16::MAX` (65,535).
|
||||
///
|
||||
/// Returned by [`Sketch::try_from_embedding`] to surface what
|
||||
/// `from_embedding`'s `debug_assert!` would have hidden in release
|
||||
/// builds — silently truncating the dimension count would otherwise
|
||||
/// let two different-length embeddings compare as if they were the
|
||||
/// same length. See ADR-084 §"Versioning" and the security-review
|
||||
/// finding L2 on PR #435 for context.
|
||||
#[error("embedding dimension {got} exceeds u16::MAX ({max})")]
|
||||
EmbeddingDimOverflow {
|
||||
/// Actual length of the input embedding.
|
||||
got: usize,
|
||||
/// Maximum supported dimension (`u16::MAX`).
|
||||
max: usize,
|
||||
},
|
||||
}
|
||||
|
||||
/// A 1-bit binary sketch of a dense embedding vector.
|
||||
|
|
@ -105,17 +121,39 @@ impl Sketch {
|
|||
/// (e.g., a re-trained AETHER head). Two sketches with different
|
||||
/// `sketch_version`s are not comparable.
|
||||
pub fn from_embedding(embedding: &[f32], sketch_version: u16) -> Self {
|
||||
debug_assert!(
|
||||
embedding.len() <= u16::MAX as usize,
|
||||
"embedding dimension exceeds u16::MAX"
|
||||
);
|
||||
// L2 hardening (PR #435 security review): in release builds the
|
||||
// previous `debug_assert!` was compiled out, allowing silent
|
||||
// u16-truncation when `embedding.len() > u16::MAX`. Saturate to
|
||||
// u16::MAX rather than truncate so two over-long embeddings
|
||||
// compare as same-dimensional rather than as accidentally-short.
|
||||
// Callers that need a hard error should use `try_from_embedding`.
|
||||
let embedding_dim = embedding.len().min(u16::MAX as usize) as u16;
|
||||
Self {
|
||||
inner: BinaryQuantized::quantize(embedding),
|
||||
embedding_dim: embedding.len() as u16,
|
||||
embedding_dim,
|
||||
sketch_version,
|
||||
}
|
||||
}
|
||||
|
||||
/// Fallible constructor that rejects embeddings longer than
|
||||
/// `u16::MAX` (65,535) instead of saturating, raising
|
||||
/// [`SketchError::EmbeddingDimOverflow`]. Use this when an
|
||||
/// over-long input should fail loudly rather than silently
|
||||
/// produce a sketch that disagrees with its source on
|
||||
/// `embedding_dim`.
|
||||
pub fn try_from_embedding(
|
||||
embedding: &[f32],
|
||||
sketch_version: u16,
|
||||
) -> Result<Self, SketchError> {
|
||||
if embedding.len() > u16::MAX as usize {
|
||||
return Err(SketchError::EmbeddingDimOverflow {
|
||||
got: embedding.len(),
|
||||
max: u16::MAX as usize,
|
||||
});
|
||||
}
|
||||
Ok(Self::from_embedding(embedding, sketch_version))
|
||||
}
|
||||
|
||||
/// Hamming distance to another sketch in `[0, embedding_dim]`.
|
||||
///
|
||||
/// Returns `None` if the two sketches have different `embedding_dim` or
|
||||
|
|
@ -486,6 +524,32 @@ mod tests {
|
|||
assert!((novelty - 0.5).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn try_from_embedding_rejects_over_long_input() {
|
||||
// L2 security-review finding (PR #435): the infallible
|
||||
// `from_embedding` saturates to u16::MAX; the fallible
|
||||
// `try_from_embedding` must surface the overflow so callers can
|
||||
// detect the misuse. We can't actually allocate a 65,536-f32
|
||||
// vector in unit tests cheaply (that's 256 KiB, fine), but we
|
||||
// can fabricate a `Vec` with `len() > u16::MAX` and check the
|
||||
// error path.
|
||||
let too_long: Vec<f32> = vec![0.5; (u16::MAX as usize) + 1];
|
||||
let err = Sketch::try_from_embedding(&too_long, 1).unwrap_err();
|
||||
match err {
|
||||
SketchError::EmbeddingDimOverflow { got, max } => {
|
||||
assert_eq!(got, (u16::MAX as usize) + 1);
|
||||
assert_eq!(max, u16::MAX as usize);
|
||||
}
|
||||
_ => panic!("expected EmbeddingDimOverflow, got {err:?}"),
|
||||
}
|
||||
|
||||
// The infallible path should *saturate* to u16::MAX rather
|
||||
// than panic in release. Verify the saturation is observable
|
||||
// on `embedding_dim()`.
|
||||
let s = Sketch::from_embedding(&too_long, 1);
|
||||
assert_eq!(s.embedding_dim(), u16::MAX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn topk_rejects_query_with_wrong_schema() {
|
||||
let mut bank = SketchBank::with_schema(4, 1);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue