fix: implement security audit findings for ruvector-solver (25 fixes)

Address 25 findings from the security and performance audit across 10 files:

CRITICAL:
- neumann.rs: fix .max() → .min() tolerance bug with f32 precision floor
  to prevent requesting impossible accuracy from f32 internal computation

HIGH (security):
- forward_push.rs: replace debug_assert! with runtime validate_params()
  returning Result<(), SolverError> with proper ValidationError variants
- cg.rs: upgrade all debug_assert_eq! to assert_eq! in dot_product_f64,
  dot_f64, axpy, axpy_f64, apply_preconditioner
- bmssp.rs: upgrade debug_assert_eq! to assert_eq! in sparse_matmul
- router.rs: upgrade debug_assert_eq! to assert_eq! with descriptive message
- forward_push.rs, backward_push.rs: add MAX_GRAPH_NODES (100M) limit
  to prevent OOM denial-of-service on untrusted graph inputs

HIGH (speed):
- neumann.rs: deduplicate extract_diag_inv_f32 by extracting
  estimate_spectral_radius_with_diag helper, reusing D^{-1} across
  spectral check and Jacobi iteration
- bmssp.rs: pre-allocate ax_buf to eliminate redundant SpMV allocation
  in V-cycle outer loop

MODERATE (security):
- forward_push.rs: guard float-to-usize overflow with .min(usize::MAX as f64)
- random_walk.rs: use saturating_mul(10) to prevent integer overflow
- neumann.rs: add tracing::warn! for zero/near-zero diagonal entries

MODERATE (speed):
- true_solver.rs: pre-allocate JL entries with Vec::with_capacity
- true_solver.rs: hoist dense row accumulators outside loops in
  project_matrix and compute_reduced_matrix
- router.rs: use binary_search instead of linear .any() for symmetry check
- bmssp.rs: optimize row swap with split_at_mut + swap_with_slice

LOW:
- types.rs: sort_by_key → sort_unstable_by_key in from_coo_generic
- types.rs: upgrade debug_assert! to assert! for bounds checks
- simd.rs: add complete f64 SIMD path (spmv_simd_f64, spmv_avx2_f64,
  spmv_scalar_f64) with AVX2 intrinsics and tests
- true_solver.rs: document structural cloning limitation with future
  refactoring note

https://claude.ai/code/session_01TiqLbr2DaNAntQHaVeLfiR
This commit is contained in:
Claude 2026-02-20 12:40:13 +00:00
parent 7ef759fa09
commit 27e4eab783
10 changed files with 258 additions and 44 deletions

View file

@ -41,6 +41,9 @@ use crate::types::{
SolverResult, SparsityProfile,
};
/// Maximum number of graph nodes to prevent OOM denial-of-service.
const MAX_GRAPH_NODES: usize = 100_000_000;
// ---------------------------------------------------------------------------
// Solver struct
// ---------------------------------------------------------------------------
@ -179,6 +182,16 @@ impl BackwardPushSolver {
let start = Instant::now();
let n = graph.rows;
if n > MAX_GRAPH_NODES {
return Err(SolverError::InvalidInput(
ValidationError::MatrixTooLarge {
rows: n,
cols: n,
max_dim: MAX_GRAPH_NODES,
},
));
}
// Build the transposed adjacency so row_entries(v) in `graph_t`
// yields the in-neighbours of v in the original graph.
let graph_t = graph.transpose();

View file

@ -404,7 +404,7 @@ fn transpose_csr(a: &CsrMatrix<f64>) -> CsrMatrix<f64> {
/// than reallocated, making this efficient for the moderate-sized matrices
/// produced during hierarchy construction.
fn sparse_matmul(a: &CsrMatrix<f64>, b: &CsrMatrix<f64>) -> CsrMatrix<f64> {
debug_assert_eq!(
assert_eq!(
a.cols, b.rows,
"sparse_matmul: dimension mismatch {}x{} * {}x{}",
a.rows, a.cols, b.rows, b.cols
@ -535,11 +535,14 @@ fn dense_direct_solve(matrix: &CsrMatrix<f64>, b: &[f64]) -> Vec<f64> {
}
if max_row != col {
for k in 0..stride {
let a_idx = col * stride + k;
let b_idx = max_row * stride + k;
aug.swap(a_idx, b_idx);
}
let (first, second) = if col < max_row {
let (left, right) = aug.split_at_mut(max_row * stride);
(&mut left[col * stride..col * stride + stride], &mut right[..stride])
} else {
let (left, right) = aug.split_at_mut(col * stride);
(&mut right[..stride], &mut left[max_row * stride..max_row * stride + stride])
};
first.swap_with_slice(second);
}
let pivot = aug[col * stride + col];
@ -769,6 +772,7 @@ impl SolverEngine for BmsspSolver {
// Solve phase: iterate V-cycles.
let mut x = vec![0.0f64; n];
let mut ax_buf = vec![0.0f64; n];
let mut convergence_history = Vec::with_capacity(max_iter);
let b_norm = {
@ -804,7 +808,8 @@ impl SolverEngine for BmsspSolver {
v_cycle(&hierarchy, &mut x, rhs, 0);
let res = residual_l2(matrix, &x, rhs);
matrix.spmv(&x, &mut ax_buf);
let res = (0..n).map(|i| { let r = rhs[i] - ax_buf[i]; r * r }).sum::<f64>().sqrt();
convergence_history.push(ConvergenceInfo {
iteration: iter,

View file

@ -74,7 +74,7 @@ use crate::types::{
/// Debug-asserts that `a.len() == b.len()`.
#[inline]
pub fn dot_product_f64(a: &[f32], b: &[f32]) -> f64 {
debug_assert_eq!(a.len(), b.len(), "dot_product_f64: length mismatch");
assert_eq!(a.len(), b.len(), "dot_product_f64: length mismatch");
let n = a.len();
let chunks = n / 4;
@ -106,7 +106,7 @@ pub fn dot_product_f64(a: &[f32], b: &[f32]) -> f64 {
/// Used internally when the working vectors are already `f64`.
#[inline]
fn dot_f64(a: &[f64], b: &[f64]) -> f64 {
debug_assert_eq!(a.len(), b.len(), "dot_f64: length mismatch");
assert_eq!(a.len(), b.len(), "dot_f64: length mismatch");
let n = a.len();
let chunks = n / 4;
@ -140,7 +140,7 @@ fn dot_f64(a: &[f64], b: &[f64]) -> f64 {
/// Debug-asserts that `x.len() == y.len()`.
#[inline]
pub fn axpy(alpha: f32, x: &[f32], y: &mut [f32]) {
debug_assert_eq!(x.len(), y.len(), "axpy: length mismatch");
assert_eq!(x.len(), y.len(), "axpy: length mismatch");
let n = x.len();
let chunks = n / 4;
@ -161,7 +161,7 @@ pub fn axpy(alpha: f32, x: &[f32], y: &mut [f32]) {
/// Compute `y[i] += alpha * x[i]` for all `i` (AXPY operation, `f64`).
#[inline]
fn axpy_f64(alpha: f64, x: &[f64], y: &mut [f64]) {
debug_assert_eq!(x.len(), y.len(), "axpy_f64: length mismatch");
assert_eq!(x.len(), y.len(), "axpy_f64: length mismatch");
let n = x.len();
let chunks = n / 4;
@ -349,8 +349,8 @@ impl ConjugateGradientSolver {
/// Apply the diagonal preconditioner: `z[i] = inv_diag[i] * r[i]`.
#[inline]
fn apply_preconditioner(inv_diag: &[f64], r: &[f64], z: &mut [f64]) {
debug_assert_eq!(inv_diag.len(), r.len());
debug_assert_eq!(r.len(), z.len());
assert_eq!(inv_diag.len(), r.len());
assert_eq!(r.len(), z.len());
let n = r.len();
let chunks = n / 4;

View file

@ -105,18 +105,40 @@ pub struct ForwardPushSolver {
impl ForwardPushSolver {
/// Create a new forward-push solver.
///
/// # Panics
///
/// Debug-asserts that `alpha` is in `(0, 1)` and `epsilon` is positive.
/// Parameters are validated lazily at the start of each computation
/// (see [`validate_params`](Self::validate_params)).
pub fn new(alpha: f64, epsilon: f64) -> Self {
debug_assert!(
alpha > 0.0 && alpha < 1.0,
"alpha must be in (0, 1), got {alpha}",
);
debug_assert!(epsilon > 0.0, "epsilon must be positive, got {epsilon}");
Self { alpha, epsilon }
}
/// Validate that `alpha` and `epsilon` are within acceptable ranges.
///
/// # Errors
///
/// - [`SolverError::InvalidInput`] if `alpha` is not in `(0, 1)` exclusive.
/// - [`SolverError::InvalidInput`] if `epsilon` is not positive.
fn validate_params(&self) -> Result<(), SolverError> {
if self.alpha <= 0.0 || self.alpha >= 1.0 {
return Err(SolverError::InvalidInput(
crate::error::ValidationError::ParameterOutOfRange {
name: "alpha".into(),
value: self.alpha.to_string(),
expected: "(0.0, 1.0) exclusive".into(),
},
));
}
if self.epsilon <= 0.0 {
return Err(SolverError::InvalidInput(
crate::error::ValidationError::ParameterOutOfRange {
name: "epsilon".into(),
value: self.epsilon.to_string(),
expected: "> 0.0".into(),
},
));
}
Ok(())
}
/// Create a solver with default parameters (`alpha = 0.85`,
/// `epsilon = 1e-6`).
pub fn default_params() -> Self {
@ -139,6 +161,7 @@ impl ForwardPushSolver {
graph: &CsrMatrix<f64>,
source: usize,
) -> Result<Vec<(usize, f64)>, SolverError> {
self.validate_params()?;
validate_vertex(graph, source, "source")?;
self.forward_push_core(graph, &[(source, 1.0)])
}
@ -166,12 +189,27 @@ impl ForwardPushSolver {
///
/// Uses a `VecDeque` work-queue with a membership bitvec to achieve
/// O(1/epsilon) total work, independent of graph size.
/// Maximum number of graph nodes to prevent OOM DoS.
const MAX_GRAPH_NODES: usize = 100_000_000;
fn forward_push_core(
&self,
graph: &CsrMatrix<f64>,
seeds: &[(usize, f64)],
) -> Result<Vec<(usize, f64)>, SolverError> {
self.validate_params()?;
let n = graph.rows;
if n > Self::MAX_GRAPH_NODES {
return Err(SolverError::InvalidInput(
crate::error::ValidationError::MatrixTooLarge {
rows: n,
cols: graph.cols,
max_dim: Self::MAX_GRAPH_NODES,
},
));
}
let mut estimate = vec![KahanAccumulator::new(); n];
let mut residual = vec![KahanAccumulator::new(); n];
@ -437,7 +475,7 @@ impl SolverEngine for ForwardPushSolver {
_profile: &SparsityProfile,
_n: usize,
) -> ComplexityEstimate {
let est_ops = (1.0 / self.epsilon) as usize;
let est_ops = (1.0 / self.epsilon).min(usize::MAX as f64) as usize;
ComplexityEstimate {
algorithm: Algorithm::ForwardPush,
estimated_flops: est_ops as u64 * 10,

View file

@ -120,8 +120,23 @@ impl NeumannSolver {
return 0.0;
}
// Extract diagonal and compute D^{-1}.
let d_inv = extract_diag_inv_f32(matrix);
Self::estimate_spectral_radius_with_diag(matrix, &d_inv)
}
/// Inner helper: estimate spectral radius using a pre-computed `d_inv`.
///
/// This avoids recomputing the diagonal inverse when the caller already
/// has it (e.g. `solve()` needs `d_inv` for both the spectral check and
/// the Jacobi iteration).
fn estimate_spectral_radius_with_diag(
matrix: &CsrMatrix<f32>,
d_inv: &[f32],
) -> f64 {
let n = matrix.rows;
if n == 0 {
return 0.0;
}
// Initialise with a deterministic pseudo-random unit vector.
let mut v: Vec<f32> = (0..n)
@ -222,10 +237,14 @@ impl NeumannSolver {
});
}
// Extract D^{-1} once — reused for both the spectral radius check
// and the Jacobi-preconditioned iteration that follows.
let d_inv = extract_diag_inv_f32(matrix);
// ------------------------------------------------------------------
// Spectral radius pre-check (10-step power iteration on I - D^{-1}A)
// ------------------------------------------------------------------
let rho = Self::estimate_spectral_radius(matrix);
let rho = Self::estimate_spectral_radius_with_diag(matrix, &d_inv);
if rho >= 1.0 {
warn!(rho, "spectral radius >= 1.0, Neumann series will diverge");
return Err(SolverError::SpectralRadiusExceeded {
@ -236,9 +255,6 @@ impl NeumannSolver {
}
info!(rho, "spectral radius check passed");
// Extract D^{-1} for Jacobi preconditioning.
let d_inv = extract_diag_inv_f32(matrix);
// ------------------------------------------------------------------
// Jacobi-preconditioned iteration (fused kernel)
//
@ -372,9 +388,14 @@ impl SolverEngine for NeumannSolver {
};
let f32_rhs: Vec<f32> = rhs.iter().map(|&v| v as f32).collect();
// Use the tighter of the solver's own budget and the caller's budget.
// Use the tighter of the solver's own tolerance and the caller's budget,
// but no tighter than f32 precision allows (the Neumann solver operates
// internally in f32, so residuals below ~f32::EPSILON are unreachable).
let max_iters = self.max_iterations.min(budget.max_iterations);
let tol = self.tolerance.max(budget.tolerance);
let tol = self
.tolerance
.min(budget.tolerance)
.max(f32::EPSILON as f64 * 4.0);
let inner_solver = NeumannSolver::new(tol, max_iters);
@ -439,6 +460,12 @@ fn extract_diag_inv_f32(matrix: &CsrMatrix<f32>) -> Vec<f32> {
let diag = matrix.values[idx];
if diag.abs() > 1e-15 {
d_inv[i] = 1.0 / diag;
} else {
warn!(
row = i,
diag_value = %diag,
"zero or near-zero diagonal entry; substituting 1.0 — matrix may be singular"
);
}
break;
}

View file

@ -429,7 +429,7 @@ impl SolverEngine for HybridRandomWalkSolver {
cdf.push(cumulative);
}
let walks = self.num_walks.min(budget.max_iterations * 10);
let walks = self.num_walks.min(budget.max_iterations.saturating_mul(10));
let mut counts = vec![0.0f64; n];
for _ in 0..walks {

View file

@ -540,8 +540,8 @@ impl SolverOrchestrator {
let col_start = matrix.row_ptr[col];
let col_end = matrix.row_ptr[col + 1];
let found = matrix.col_indices[col_start..col_end]
.iter()
.any(|&c| c == row);
.binary_search(&row)
.is_ok();
if !found {
symmetric_mismatches += 1;
}
@ -1130,7 +1130,7 @@ impl Default for SolverOrchestrator {
#[inline]
#[allow(dead_code)]
fn dot(a: &[f64], b: &[f64]) -> f64 {
debug_assert_eq!(a.len(), b.len());
assert_eq!(a.len(), b.len(), "dot: length mismatch {} vs {}", a.len(), b.len());
a.iter()
.zip(b.iter())
.map(|(&ai, &bi)| ai * bi)

View file

@ -120,6 +120,91 @@ unsafe fn horizontal_sum_f32x8(v: std::arch::x86_64::__m256) -> f32 {
_mm_cvtss_f32(result)
}
/// Sparse matrix-vector multiply with optional SIMD acceleration for f64.
///
/// Computes `y = A * x` where `A` is a CSR matrix of `f64` values.
pub fn spmv_simd_f64(matrix: &CsrMatrix<f64>, x: &[f64], y: &mut [f64]) {
assert_eq!(x.len(), matrix.cols, "x length must equal matrix.cols");
assert_eq!(y.len(), matrix.rows, "y length must equal matrix.rows");
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
{
if is_x86_feature_detected!("avx2") {
unsafe {
spmv_avx2_f64(matrix, x, y);
}
return;
}
}
spmv_scalar_f64(matrix, x, y);
}
/// Scalar fallback for f64 SpMV.
pub fn spmv_scalar_f64(matrix: &CsrMatrix<f64>, x: &[f64], y: &mut [f64]) {
for i in 0..matrix.rows {
let start = matrix.row_ptr[i];
let end = matrix.row_ptr[i + 1];
let mut sum = 0.0f64;
for idx in start..end {
let col = matrix.col_indices[idx];
sum += matrix.values[idx] * x[col];
}
y[i] = sum;
}
}
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn spmv_avx2_f64(matrix: &CsrMatrix<f64>, x: &[f64], y: &mut [f64]) {
use std::arch::x86_64::*;
for i in 0..matrix.rows {
let start = matrix.row_ptr[i];
let end = matrix.row_ptr[i + 1];
let len = end - start;
let mut accum = _mm256_setzero_pd();
let chunks = len / 4;
let remainder = len % 4;
for chunk in 0..chunks {
let base = start + chunk * 4;
let vals = _mm256_loadu_pd(matrix.values.as_ptr().add(base));
let mut x_buf = [0.0f64; 4];
for k in 0..4 {
let col = *matrix.col_indices.get_unchecked(base + k);
x_buf[k] = *x.get_unchecked(col);
}
let x_vec = _mm256_loadu_pd(x_buf.as_ptr());
accum = _mm256_add_pd(accum, _mm256_mul_pd(vals, x_vec));
}
let mut sum = horizontal_sum_f64x4(accum);
let tail_start = start + chunks * 4;
for idx in tail_start..(tail_start + remainder) {
let col = *matrix.col_indices.get_unchecked(idx);
sum += *matrix.values.get_unchecked(idx) * *x.get_unchecked(col);
}
*y.get_unchecked_mut(i) = sum;
}
}
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn horizontal_sum_f64x4(v: std::arch::x86_64::__m256d) -> f64 {
use std::arch::x86_64::*;
let hi = _mm256_extractf128_pd(v, 1);
let lo = _mm256_castpd256_pd128(v);
let sum128 = _mm_add_pd(lo, hi);
let hi64 = _mm_unpackhi_pd(sum128, sum128);
let result = _mm_add_sd(sum128, hi64);
_mm_cvtsd_f64(result)
}
#[cfg(test)]
mod tests {
use super::*;
@ -159,4 +244,38 @@ mod tests {
assert!((y[1] - 6.0).abs() < 1e-6);
assert!((y[2] - 13.0).abs() < 1e-6);
}
#[test]
fn spmv_simd_f64_correctness() {
let mat = CsrMatrix::<f64> {
values: vec![2.0, 1.0, 3.0, 1.0, 4.0],
col_indices: vec![0, 2, 1, 0, 2],
row_ptr: vec![0, 2, 3, 5],
rows: 3,
cols: 3,
};
let x = vec![1.0, 2.0, 3.0];
let mut y = vec![0.0f64; 3];
spmv_simd_f64(&mat, &x, &mut y);
assert!((y[0] - 5.0).abs() < 1e-10);
assert!((y[1] - 6.0).abs() < 1e-10);
assert!((y[2] - 13.0).abs() < 1e-10);
}
#[test]
fn scalar_spmv_f64_correctness() {
let mat = CsrMatrix::<f64> {
values: vec![2.0, 1.0, 3.0, 1.0, 4.0],
col_indices: vec![0, 2, 1, 0, 2],
row_ptr: vec![0, 2, 3, 5],
rows: 3,
cols: 3,
};
let x = vec![1.0, 2.0, 3.0];
let mut y = vec![0.0f64; 3];
spmv_scalar_f64(&mat, &x, &mut y);
assert!((y[0] - 5.0).abs() < 1e-10);
assert!((y[1] - 6.0).abs() < 1e-10);
assert!((y[2] - 13.0).abs() < 1e-10);
}
}

View file

@ -120,7 +120,7 @@ impl TrueSolver {
) -> Vec<(usize, usize, f32)> {
let scale = 1.0 / (k as f64).sqrt();
let scale_f32 = scale as f32;
let mut entries = Vec::new();
let mut entries = Vec::with_capacity(((k * n) as f64 / 3.0).ceil() as usize);
for row in 0..k {
for col in 0..n {
@ -169,11 +169,12 @@ impl TrueSolver {
// For each row i of Pi, compute B[i,:] = Pi[i,:] * A.
let mut b_entries: Vec<(usize, usize, f32)> = Vec::new();
// Hoist accumulator outside loop to avoid reallocating each iteration.
let mut b_row = vec![0.0f32; n];
for pi_row in 0..k {
let pi_start = pi.row_ptr[pi_row];
let pi_end = pi.row_ptr[pi_row + 1];
let mut b_row = vec![0.0f32; n];
for pi_idx in pi_start..pi_end {
let pi_col = pi.col_indices[pi_idx];
let pi_val = pi.values[pi_idx];
@ -190,6 +191,9 @@ impl TrueSolver {
b_entries.push((pi_row, col, val));
}
}
// Zero the accumulator for the next row.
b_row.iter_mut().for_each(|v| *v = 0.0);
}
let b_matrix = CsrMatrix::<f32>::from_coo(k, n, b_entries);
@ -207,11 +211,12 @@ impl TrueSolver {
let mut a_prime_entries: Vec<(usize, usize, f32)> = Vec::new();
for b_row in 0..k {
let b_start = b_matrix.row_ptr[b_row];
let b_end = b_matrix.row_ptr[b_row + 1];
// Hoist accumulator outside loop to avoid reallocating each iteration.
let mut row_accum = vec![0.0f32; k];
for b_row_idx in 0..k {
let b_start = b_matrix.row_ptr[b_row_idx];
let b_end = b_matrix.row_ptr[b_row_idx + 1];
let mut row_accum = vec![0.0f32; k];
for b_idx in b_start..b_end {
let l = b_matrix.col_indices[b_idx];
let b_val = b_matrix.values[b_idx];
@ -223,9 +228,12 @@ impl TrueSolver {
for (j, &val) in row_accum.iter().enumerate() {
if val.abs() > f32::EPSILON {
a_prime_entries.push((b_row, j, val));
a_prime_entries.push((b_row_idx, j, val));
}
}
// Zero the accumulator for the next row.
row_accum.iter_mut().for_each(|v| *v = 0.0);
}
CsrMatrix::<f32>::from_coo(k, k, a_prime_entries)
@ -560,7 +568,11 @@ impl SolverEngine for TrueSolver {
rhs: &[f64],
_budget: &crate::types::ComputeBudget,
) -> Result<SolverResult, SolverError> {
// Convert f64 input to f32 for internal computation
// Convert f64 input to f32 for internal computation.
// NOTE: row_ptr and col_indices are cloned here because CsrMatrix owns
// Vec<usize>, so we cannot borrow from the f64 matrix. A future
// refactor could introduce a CsrMatrixView that borrows structural
// arrays to eliminate these allocations on the f64 -> f32 path.
let f32_values: Vec<f32> = matrix.values.iter().map(|&v| v as f32).collect();
let f32_matrix = CsrMatrix {
row_ptr: matrix.row_ptr.clone(),

View file

@ -268,7 +268,7 @@ impl<T: Copy + Default + std::ops::AddAssign> CsrMatrix<T> {
entries: impl IntoIterator<Item = (usize, usize, T)>,
) -> Self {
let mut sorted: Vec<_> = entries.into_iter().collect();
sorted.sort_by_key(|(r, c, _)| (*r, *c));
sorted.sort_unstable_by_key(|(r, c, _)| (*r, *c));
let nnz = sorted.len();
let mut row_ptr = vec![0usize; rows + 1];
@ -276,7 +276,7 @@ impl<T: Copy + Default + std::ops::AddAssign> CsrMatrix<T> {
let mut values = Vec::with_capacity(nnz);
for &(r, _, _) in &sorted {
debug_assert!(r < rows, "row index {} out of bounds (rows={})", r, rows);
assert!(r < rows, "row index {} out of bounds (rows={})", r, rows);
row_ptr[r + 1] += 1;
}
for i in 1..=rows {
@ -284,7 +284,7 @@ impl<T: Copy + Default + std::ops::AddAssign> CsrMatrix<T> {
}
for (_, c, v) in sorted {
debug_assert!(c < cols, "col index {} out of bounds (cols={})", c, cols);
assert!(c < cols, "col index {} out of bounds (cols={})", c, cols);
col_indices.push(c);
values.push(v);
}