mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-06-01 06:10:31 +00:00
fix: implement security audit findings for ruvector-solver (25 fixes)
Address 25 findings from the security and performance audit across 10 files:
CRITICAL:
- neumann.rs: fix .max() → .min() tolerance bug with f32 precision floor
to prevent requesting impossible accuracy from f32 internal computation
HIGH (security):
- forward_push.rs: replace debug_assert! with runtime validate_params()
returning Result<(), SolverError> with proper ValidationError variants
- cg.rs: upgrade all debug_assert_eq! to assert_eq! in dot_product_f64,
dot_f64, axpy, axpy_f64, apply_preconditioner
- bmssp.rs: upgrade debug_assert_eq! to assert_eq! in sparse_matmul
- router.rs: upgrade debug_assert_eq! to assert_eq! with descriptive message
- forward_push.rs, backward_push.rs: add MAX_GRAPH_NODES (100M) limit
to prevent OOM denial-of-service on untrusted graph inputs
HIGH (speed):
- neumann.rs: deduplicate extract_diag_inv_f32 by extracting
estimate_spectral_radius_with_diag helper, reusing D^{-1} across
spectral check and Jacobi iteration
- bmssp.rs: pre-allocate ax_buf to eliminate redundant SpMV allocation
in V-cycle outer loop
MODERATE (security):
- forward_push.rs: guard float-to-usize overflow with .min(usize::MAX as f64)
- random_walk.rs: use saturating_mul(10) to prevent integer overflow
- neumann.rs: add tracing::warn! for zero/near-zero diagonal entries
MODERATE (speed):
- true_solver.rs: pre-allocate JL entries with Vec::with_capacity
- true_solver.rs: hoist dense row accumulators outside loops in
project_matrix and compute_reduced_matrix
- router.rs: use binary_search instead of linear .any() for symmetry check
- bmssp.rs: optimize row swap with split_at_mut + swap_with_slice
LOW:
- types.rs: sort_by_key → sort_unstable_by_key in from_coo_generic
- types.rs: upgrade debug_assert! to assert! for bounds checks
- simd.rs: add complete f64 SIMD path (spmv_simd_f64, spmv_avx2_f64,
spmv_scalar_f64) with AVX2 intrinsics and tests
- true_solver.rs: document structural cloning limitation with future
refactoring note
https://claude.ai/code/session_01TiqLbr2DaNAntQHaVeLfiR
This commit is contained in:
parent
7ef759fa09
commit
27e4eab783
10 changed files with 258 additions and 44 deletions
|
|
@ -41,6 +41,9 @@ use crate::types::{
|
|||
SolverResult, SparsityProfile,
|
||||
};
|
||||
|
||||
/// Maximum number of graph nodes to prevent OOM denial-of-service.
|
||||
const MAX_GRAPH_NODES: usize = 100_000_000;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Solver struct
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -179,6 +182,16 @@ impl BackwardPushSolver {
|
|||
let start = Instant::now();
|
||||
let n = graph.rows;
|
||||
|
||||
if n > MAX_GRAPH_NODES {
|
||||
return Err(SolverError::InvalidInput(
|
||||
ValidationError::MatrixTooLarge {
|
||||
rows: n,
|
||||
cols: n,
|
||||
max_dim: MAX_GRAPH_NODES,
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
// Build the transposed adjacency so row_entries(v) in `graph_t`
|
||||
// yields the in-neighbours of v in the original graph.
|
||||
let graph_t = graph.transpose();
|
||||
|
|
|
|||
|
|
@ -404,7 +404,7 @@ fn transpose_csr(a: &CsrMatrix<f64>) -> CsrMatrix<f64> {
|
|||
/// than reallocated, making this efficient for the moderate-sized matrices
|
||||
/// produced during hierarchy construction.
|
||||
fn sparse_matmul(a: &CsrMatrix<f64>, b: &CsrMatrix<f64>) -> CsrMatrix<f64> {
|
||||
debug_assert_eq!(
|
||||
assert_eq!(
|
||||
a.cols, b.rows,
|
||||
"sparse_matmul: dimension mismatch {}x{} * {}x{}",
|
||||
a.rows, a.cols, b.rows, b.cols
|
||||
|
|
@ -535,11 +535,14 @@ fn dense_direct_solve(matrix: &CsrMatrix<f64>, b: &[f64]) -> Vec<f64> {
|
|||
}
|
||||
|
||||
if max_row != col {
|
||||
for k in 0..stride {
|
||||
let a_idx = col * stride + k;
|
||||
let b_idx = max_row * stride + k;
|
||||
aug.swap(a_idx, b_idx);
|
||||
}
|
||||
let (first, second) = if col < max_row {
|
||||
let (left, right) = aug.split_at_mut(max_row * stride);
|
||||
(&mut left[col * stride..col * stride + stride], &mut right[..stride])
|
||||
} else {
|
||||
let (left, right) = aug.split_at_mut(col * stride);
|
||||
(&mut right[..stride], &mut left[max_row * stride..max_row * stride + stride])
|
||||
};
|
||||
first.swap_with_slice(second);
|
||||
}
|
||||
|
||||
let pivot = aug[col * stride + col];
|
||||
|
|
@ -769,6 +772,7 @@ impl SolverEngine for BmsspSolver {
|
|||
|
||||
// Solve phase: iterate V-cycles.
|
||||
let mut x = vec![0.0f64; n];
|
||||
let mut ax_buf = vec![0.0f64; n];
|
||||
let mut convergence_history = Vec::with_capacity(max_iter);
|
||||
|
||||
let b_norm = {
|
||||
|
|
@ -804,7 +808,8 @@ impl SolverEngine for BmsspSolver {
|
|||
|
||||
v_cycle(&hierarchy, &mut x, rhs, 0);
|
||||
|
||||
let res = residual_l2(matrix, &x, rhs);
|
||||
matrix.spmv(&x, &mut ax_buf);
|
||||
let res = (0..n).map(|i| { let r = rhs[i] - ax_buf[i]; r * r }).sum::<f64>().sqrt();
|
||||
|
||||
convergence_history.push(ConvergenceInfo {
|
||||
iteration: iter,
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ use crate::types::{
|
|||
/// Debug-asserts that `a.len() == b.len()`.
|
||||
#[inline]
|
||||
pub fn dot_product_f64(a: &[f32], b: &[f32]) -> f64 {
|
||||
debug_assert_eq!(a.len(), b.len(), "dot_product_f64: length mismatch");
|
||||
assert_eq!(a.len(), b.len(), "dot_product_f64: length mismatch");
|
||||
|
||||
let n = a.len();
|
||||
let chunks = n / 4;
|
||||
|
|
@ -106,7 +106,7 @@ pub fn dot_product_f64(a: &[f32], b: &[f32]) -> f64 {
|
|||
/// Used internally when the working vectors are already `f64`.
|
||||
#[inline]
|
||||
fn dot_f64(a: &[f64], b: &[f64]) -> f64 {
|
||||
debug_assert_eq!(a.len(), b.len(), "dot_f64: length mismatch");
|
||||
assert_eq!(a.len(), b.len(), "dot_f64: length mismatch");
|
||||
|
||||
let n = a.len();
|
||||
let chunks = n / 4;
|
||||
|
|
@ -140,7 +140,7 @@ fn dot_f64(a: &[f64], b: &[f64]) -> f64 {
|
|||
/// Debug-asserts that `x.len() == y.len()`.
|
||||
#[inline]
|
||||
pub fn axpy(alpha: f32, x: &[f32], y: &mut [f32]) {
|
||||
debug_assert_eq!(x.len(), y.len(), "axpy: length mismatch");
|
||||
assert_eq!(x.len(), y.len(), "axpy: length mismatch");
|
||||
|
||||
let n = x.len();
|
||||
let chunks = n / 4;
|
||||
|
|
@ -161,7 +161,7 @@ pub fn axpy(alpha: f32, x: &[f32], y: &mut [f32]) {
|
|||
/// Compute `y[i] += alpha * x[i]` for all `i` (AXPY operation, `f64`).
|
||||
#[inline]
|
||||
fn axpy_f64(alpha: f64, x: &[f64], y: &mut [f64]) {
|
||||
debug_assert_eq!(x.len(), y.len(), "axpy_f64: length mismatch");
|
||||
assert_eq!(x.len(), y.len(), "axpy_f64: length mismatch");
|
||||
|
||||
let n = x.len();
|
||||
let chunks = n / 4;
|
||||
|
|
@ -349,8 +349,8 @@ impl ConjugateGradientSolver {
|
|||
/// Apply the diagonal preconditioner: `z[i] = inv_diag[i] * r[i]`.
|
||||
#[inline]
|
||||
fn apply_preconditioner(inv_diag: &[f64], r: &[f64], z: &mut [f64]) {
|
||||
debug_assert_eq!(inv_diag.len(), r.len());
|
||||
debug_assert_eq!(r.len(), z.len());
|
||||
assert_eq!(inv_diag.len(), r.len());
|
||||
assert_eq!(r.len(), z.len());
|
||||
|
||||
let n = r.len();
|
||||
let chunks = n / 4;
|
||||
|
|
|
|||
|
|
@ -105,18 +105,40 @@ pub struct ForwardPushSolver {
|
|||
impl ForwardPushSolver {
|
||||
/// Create a new forward-push solver.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Debug-asserts that `alpha` is in `(0, 1)` and `epsilon` is positive.
|
||||
/// Parameters are validated lazily at the start of each computation
|
||||
/// (see [`validate_params`](Self::validate_params)).
|
||||
pub fn new(alpha: f64, epsilon: f64) -> Self {
|
||||
debug_assert!(
|
||||
alpha > 0.0 && alpha < 1.0,
|
||||
"alpha must be in (0, 1), got {alpha}",
|
||||
);
|
||||
debug_assert!(epsilon > 0.0, "epsilon must be positive, got {epsilon}");
|
||||
Self { alpha, epsilon }
|
||||
}
|
||||
|
||||
/// Validate that `alpha` and `epsilon` are within acceptable ranges.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// - [`SolverError::InvalidInput`] if `alpha` is not in `(0, 1)` exclusive.
|
||||
/// - [`SolverError::InvalidInput`] if `epsilon` is not positive.
|
||||
fn validate_params(&self) -> Result<(), SolverError> {
|
||||
if self.alpha <= 0.0 || self.alpha >= 1.0 {
|
||||
return Err(SolverError::InvalidInput(
|
||||
crate::error::ValidationError::ParameterOutOfRange {
|
||||
name: "alpha".into(),
|
||||
value: self.alpha.to_string(),
|
||||
expected: "(0.0, 1.0) exclusive".into(),
|
||||
},
|
||||
));
|
||||
}
|
||||
if self.epsilon <= 0.0 {
|
||||
return Err(SolverError::InvalidInput(
|
||||
crate::error::ValidationError::ParameterOutOfRange {
|
||||
name: "epsilon".into(),
|
||||
value: self.epsilon.to_string(),
|
||||
expected: "> 0.0".into(),
|
||||
},
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a solver with default parameters (`alpha = 0.85`,
|
||||
/// `epsilon = 1e-6`).
|
||||
pub fn default_params() -> Self {
|
||||
|
|
@ -139,6 +161,7 @@ impl ForwardPushSolver {
|
|||
graph: &CsrMatrix<f64>,
|
||||
source: usize,
|
||||
) -> Result<Vec<(usize, f64)>, SolverError> {
|
||||
self.validate_params()?;
|
||||
validate_vertex(graph, source, "source")?;
|
||||
self.forward_push_core(graph, &[(source, 1.0)])
|
||||
}
|
||||
|
|
@ -166,12 +189,27 @@ impl ForwardPushSolver {
|
|||
///
|
||||
/// Uses a `VecDeque` work-queue with a membership bitvec to achieve
|
||||
/// O(1/epsilon) total work, independent of graph size.
|
||||
/// Maximum number of graph nodes to prevent OOM DoS.
|
||||
const MAX_GRAPH_NODES: usize = 100_000_000;
|
||||
|
||||
fn forward_push_core(
|
||||
&self,
|
||||
graph: &CsrMatrix<f64>,
|
||||
seeds: &[(usize, f64)],
|
||||
) -> Result<Vec<(usize, f64)>, SolverError> {
|
||||
self.validate_params()?;
|
||||
|
||||
let n = graph.rows;
|
||||
if n > Self::MAX_GRAPH_NODES {
|
||||
return Err(SolverError::InvalidInput(
|
||||
crate::error::ValidationError::MatrixTooLarge {
|
||||
rows: n,
|
||||
cols: graph.cols,
|
||||
max_dim: Self::MAX_GRAPH_NODES,
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
let mut estimate = vec![KahanAccumulator::new(); n];
|
||||
let mut residual = vec![KahanAccumulator::new(); n];
|
||||
|
||||
|
|
@ -437,7 +475,7 @@ impl SolverEngine for ForwardPushSolver {
|
|||
_profile: &SparsityProfile,
|
||||
_n: usize,
|
||||
) -> ComplexityEstimate {
|
||||
let est_ops = (1.0 / self.epsilon) as usize;
|
||||
let est_ops = (1.0 / self.epsilon).min(usize::MAX as f64) as usize;
|
||||
ComplexityEstimate {
|
||||
algorithm: Algorithm::ForwardPush,
|
||||
estimated_flops: est_ops as u64 * 10,
|
||||
|
|
|
|||
|
|
@ -120,8 +120,23 @@ impl NeumannSolver {
|
|||
return 0.0;
|
||||
}
|
||||
|
||||
// Extract diagonal and compute D^{-1}.
|
||||
let d_inv = extract_diag_inv_f32(matrix);
|
||||
Self::estimate_spectral_radius_with_diag(matrix, &d_inv)
|
||||
}
|
||||
|
||||
/// Inner helper: estimate spectral radius using a pre-computed `d_inv`.
|
||||
///
|
||||
/// This avoids recomputing the diagonal inverse when the caller already
|
||||
/// has it (e.g. `solve()` needs `d_inv` for both the spectral check and
|
||||
/// the Jacobi iteration).
|
||||
fn estimate_spectral_radius_with_diag(
|
||||
matrix: &CsrMatrix<f32>,
|
||||
d_inv: &[f32],
|
||||
) -> f64 {
|
||||
let n = matrix.rows;
|
||||
if n == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Initialise with a deterministic pseudo-random unit vector.
|
||||
let mut v: Vec<f32> = (0..n)
|
||||
|
|
@ -222,10 +237,14 @@ impl NeumannSolver {
|
|||
});
|
||||
}
|
||||
|
||||
// Extract D^{-1} once — reused for both the spectral radius check
|
||||
// and the Jacobi-preconditioned iteration that follows.
|
||||
let d_inv = extract_diag_inv_f32(matrix);
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Spectral radius pre-check (10-step power iteration on I - D^{-1}A)
|
||||
// ------------------------------------------------------------------
|
||||
let rho = Self::estimate_spectral_radius(matrix);
|
||||
let rho = Self::estimate_spectral_radius_with_diag(matrix, &d_inv);
|
||||
if rho >= 1.0 {
|
||||
warn!(rho, "spectral radius >= 1.0, Neumann series will diverge");
|
||||
return Err(SolverError::SpectralRadiusExceeded {
|
||||
|
|
@ -236,9 +255,6 @@ impl NeumannSolver {
|
|||
}
|
||||
info!(rho, "spectral radius check passed");
|
||||
|
||||
// Extract D^{-1} for Jacobi preconditioning.
|
||||
let d_inv = extract_diag_inv_f32(matrix);
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Jacobi-preconditioned iteration (fused kernel)
|
||||
//
|
||||
|
|
@ -372,9 +388,14 @@ impl SolverEngine for NeumannSolver {
|
|||
};
|
||||
let f32_rhs: Vec<f32> = rhs.iter().map(|&v| v as f32).collect();
|
||||
|
||||
// Use the tighter of the solver's own budget and the caller's budget.
|
||||
// Use the tighter of the solver's own tolerance and the caller's budget,
|
||||
// but no tighter than f32 precision allows (the Neumann solver operates
|
||||
// internally in f32, so residuals below ~f32::EPSILON are unreachable).
|
||||
let max_iters = self.max_iterations.min(budget.max_iterations);
|
||||
let tol = self.tolerance.max(budget.tolerance);
|
||||
let tol = self
|
||||
.tolerance
|
||||
.min(budget.tolerance)
|
||||
.max(f32::EPSILON as f64 * 4.0);
|
||||
|
||||
let inner_solver = NeumannSolver::new(tol, max_iters);
|
||||
|
||||
|
|
@ -439,6 +460,12 @@ fn extract_diag_inv_f32(matrix: &CsrMatrix<f32>) -> Vec<f32> {
|
|||
let diag = matrix.values[idx];
|
||||
if diag.abs() > 1e-15 {
|
||||
d_inv[i] = 1.0 / diag;
|
||||
} else {
|
||||
warn!(
|
||||
row = i,
|
||||
diag_value = %diag,
|
||||
"zero or near-zero diagonal entry; substituting 1.0 — matrix may be singular"
|
||||
);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -429,7 +429,7 @@ impl SolverEngine for HybridRandomWalkSolver {
|
|||
cdf.push(cumulative);
|
||||
}
|
||||
|
||||
let walks = self.num_walks.min(budget.max_iterations * 10);
|
||||
let walks = self.num_walks.min(budget.max_iterations.saturating_mul(10));
|
||||
let mut counts = vec![0.0f64; n];
|
||||
|
||||
for _ in 0..walks {
|
||||
|
|
|
|||
|
|
@ -540,8 +540,8 @@ impl SolverOrchestrator {
|
|||
let col_start = matrix.row_ptr[col];
|
||||
let col_end = matrix.row_ptr[col + 1];
|
||||
let found = matrix.col_indices[col_start..col_end]
|
||||
.iter()
|
||||
.any(|&c| c == row);
|
||||
.binary_search(&row)
|
||||
.is_ok();
|
||||
if !found {
|
||||
symmetric_mismatches += 1;
|
||||
}
|
||||
|
|
@ -1130,7 +1130,7 @@ impl Default for SolverOrchestrator {
|
|||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn dot(a: &[f64], b: &[f64]) -> f64 {
|
||||
debug_assert_eq!(a.len(), b.len());
|
||||
assert_eq!(a.len(), b.len(), "dot: length mismatch {} vs {}", a.len(), b.len());
|
||||
a.iter()
|
||||
.zip(b.iter())
|
||||
.map(|(&ai, &bi)| ai * bi)
|
||||
|
|
|
|||
|
|
@ -120,6 +120,91 @@ unsafe fn horizontal_sum_f32x8(v: std::arch::x86_64::__m256) -> f32 {
|
|||
_mm_cvtss_f32(result)
|
||||
}
|
||||
|
||||
/// Sparse matrix-vector multiply with optional SIMD acceleration for f64.
|
||||
///
|
||||
/// Computes `y = A * x` where `A` is a CSR matrix of `f64` values.
|
||||
pub fn spmv_simd_f64(matrix: &CsrMatrix<f64>, x: &[f64], y: &mut [f64]) {
|
||||
assert_eq!(x.len(), matrix.cols, "x length must equal matrix.cols");
|
||||
assert_eq!(y.len(), matrix.rows, "y length must equal matrix.rows");
|
||||
|
||||
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
|
||||
{
|
||||
if is_x86_feature_detected!("avx2") {
|
||||
unsafe {
|
||||
spmv_avx2_f64(matrix, x, y);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
spmv_scalar_f64(matrix, x, y);
|
||||
}
|
||||
|
||||
/// Scalar fallback for f64 SpMV.
|
||||
pub fn spmv_scalar_f64(matrix: &CsrMatrix<f64>, x: &[f64], y: &mut [f64]) {
|
||||
for i in 0..matrix.rows {
|
||||
let start = matrix.row_ptr[i];
|
||||
let end = matrix.row_ptr[i + 1];
|
||||
let mut sum = 0.0f64;
|
||||
for idx in start..end {
|
||||
let col = matrix.col_indices[idx];
|
||||
sum += matrix.values[idx] * x[col];
|
||||
}
|
||||
y[i] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn spmv_avx2_f64(matrix: &CsrMatrix<f64>, x: &[f64], y: &mut [f64]) {
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
for i in 0..matrix.rows {
|
||||
let start = matrix.row_ptr[i];
|
||||
let end = matrix.row_ptr[i + 1];
|
||||
let len = end - start;
|
||||
|
||||
let mut accum = _mm256_setzero_pd();
|
||||
let chunks = len / 4;
|
||||
let remainder = len % 4;
|
||||
|
||||
for chunk in 0..chunks {
|
||||
let base = start + chunk * 4;
|
||||
let vals = _mm256_loadu_pd(matrix.values.as_ptr().add(base));
|
||||
|
||||
let mut x_buf = [0.0f64; 4];
|
||||
for k in 0..4 {
|
||||
let col = *matrix.col_indices.get_unchecked(base + k);
|
||||
x_buf[k] = *x.get_unchecked(col);
|
||||
}
|
||||
let x_vec = _mm256_loadu_pd(x_buf.as_ptr());
|
||||
accum = _mm256_add_pd(accum, _mm256_mul_pd(vals, x_vec));
|
||||
}
|
||||
|
||||
let mut sum = horizontal_sum_f64x4(accum);
|
||||
|
||||
let tail_start = start + chunks * 4;
|
||||
for idx in tail_start..(tail_start + remainder) {
|
||||
let col = *matrix.col_indices.get_unchecked(idx);
|
||||
sum += *matrix.values.get_unchecked(idx) * *x.get_unchecked(col);
|
||||
}
|
||||
|
||||
*y.get_unchecked_mut(i) = sum;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn horizontal_sum_f64x4(v: std::arch::x86_64::__m256d) -> f64 {
|
||||
use std::arch::x86_64::*;
|
||||
let hi = _mm256_extractf128_pd(v, 1);
|
||||
let lo = _mm256_castpd256_pd128(v);
|
||||
let sum128 = _mm_add_pd(lo, hi);
|
||||
let hi64 = _mm_unpackhi_pd(sum128, sum128);
|
||||
let result = _mm_add_sd(sum128, hi64);
|
||||
_mm_cvtsd_f64(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
@ -159,4 +244,38 @@ mod tests {
|
|||
assert!((y[1] - 6.0).abs() < 1e-6);
|
||||
assert!((y[2] - 13.0).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spmv_simd_f64_correctness() {
|
||||
let mat = CsrMatrix::<f64> {
|
||||
values: vec![2.0, 1.0, 3.0, 1.0, 4.0],
|
||||
col_indices: vec![0, 2, 1, 0, 2],
|
||||
row_ptr: vec![0, 2, 3, 5],
|
||||
rows: 3,
|
||||
cols: 3,
|
||||
};
|
||||
let x = vec![1.0, 2.0, 3.0];
|
||||
let mut y = vec![0.0f64; 3];
|
||||
spmv_simd_f64(&mat, &x, &mut y);
|
||||
assert!((y[0] - 5.0).abs() < 1e-10);
|
||||
assert!((y[1] - 6.0).abs() < 1e-10);
|
||||
assert!((y[2] - 13.0).abs() < 1e-10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scalar_spmv_f64_correctness() {
|
||||
let mat = CsrMatrix::<f64> {
|
||||
values: vec![2.0, 1.0, 3.0, 1.0, 4.0],
|
||||
col_indices: vec![0, 2, 1, 0, 2],
|
||||
row_ptr: vec![0, 2, 3, 5],
|
||||
rows: 3,
|
||||
cols: 3,
|
||||
};
|
||||
let x = vec![1.0, 2.0, 3.0];
|
||||
let mut y = vec![0.0f64; 3];
|
||||
spmv_scalar_f64(&mat, &x, &mut y);
|
||||
assert!((y[0] - 5.0).abs() < 1e-10);
|
||||
assert!((y[1] - 6.0).abs() < 1e-10);
|
||||
assert!((y[2] - 13.0).abs() < 1e-10);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ impl TrueSolver {
|
|||
) -> Vec<(usize, usize, f32)> {
|
||||
let scale = 1.0 / (k as f64).sqrt();
|
||||
let scale_f32 = scale as f32;
|
||||
let mut entries = Vec::new();
|
||||
let mut entries = Vec::with_capacity(((k * n) as f64 / 3.0).ceil() as usize);
|
||||
|
||||
for row in 0..k {
|
||||
for col in 0..n {
|
||||
|
|
@ -169,11 +169,12 @@ impl TrueSolver {
|
|||
// For each row i of Pi, compute B[i,:] = Pi[i,:] * A.
|
||||
let mut b_entries: Vec<(usize, usize, f32)> = Vec::new();
|
||||
|
||||
// Hoist accumulator outside loop to avoid reallocating each iteration.
|
||||
let mut b_row = vec![0.0f32; n];
|
||||
for pi_row in 0..k {
|
||||
let pi_start = pi.row_ptr[pi_row];
|
||||
let pi_end = pi.row_ptr[pi_row + 1];
|
||||
|
||||
let mut b_row = vec![0.0f32; n];
|
||||
for pi_idx in pi_start..pi_end {
|
||||
let pi_col = pi.col_indices[pi_idx];
|
||||
let pi_val = pi.values[pi_idx];
|
||||
|
|
@ -190,6 +191,9 @@ impl TrueSolver {
|
|||
b_entries.push((pi_row, col, val));
|
||||
}
|
||||
}
|
||||
|
||||
// Zero the accumulator for the next row.
|
||||
b_row.iter_mut().for_each(|v| *v = 0.0);
|
||||
}
|
||||
|
||||
let b_matrix = CsrMatrix::<f32>::from_coo(k, n, b_entries);
|
||||
|
|
@ -207,11 +211,12 @@ impl TrueSolver {
|
|||
|
||||
let mut a_prime_entries: Vec<(usize, usize, f32)> = Vec::new();
|
||||
|
||||
for b_row in 0..k {
|
||||
let b_start = b_matrix.row_ptr[b_row];
|
||||
let b_end = b_matrix.row_ptr[b_row + 1];
|
||||
// Hoist accumulator outside loop to avoid reallocating each iteration.
|
||||
let mut row_accum = vec![0.0f32; k];
|
||||
for b_row_idx in 0..k {
|
||||
let b_start = b_matrix.row_ptr[b_row_idx];
|
||||
let b_end = b_matrix.row_ptr[b_row_idx + 1];
|
||||
|
||||
let mut row_accum = vec![0.0f32; k];
|
||||
for b_idx in b_start..b_end {
|
||||
let l = b_matrix.col_indices[b_idx];
|
||||
let b_val = b_matrix.values[b_idx];
|
||||
|
|
@ -223,9 +228,12 @@ impl TrueSolver {
|
|||
|
||||
for (j, &val) in row_accum.iter().enumerate() {
|
||||
if val.abs() > f32::EPSILON {
|
||||
a_prime_entries.push((b_row, j, val));
|
||||
a_prime_entries.push((b_row_idx, j, val));
|
||||
}
|
||||
}
|
||||
|
||||
// Zero the accumulator for the next row.
|
||||
row_accum.iter_mut().for_each(|v| *v = 0.0);
|
||||
}
|
||||
|
||||
CsrMatrix::<f32>::from_coo(k, k, a_prime_entries)
|
||||
|
|
@ -560,7 +568,11 @@ impl SolverEngine for TrueSolver {
|
|||
rhs: &[f64],
|
||||
_budget: &crate::types::ComputeBudget,
|
||||
) -> Result<SolverResult, SolverError> {
|
||||
// Convert f64 input to f32 for internal computation
|
||||
// Convert f64 input to f32 for internal computation.
|
||||
// NOTE: row_ptr and col_indices are cloned here because CsrMatrix owns
|
||||
// Vec<usize>, so we cannot borrow from the f64 matrix. A future
|
||||
// refactor could introduce a CsrMatrixView that borrows structural
|
||||
// arrays to eliminate these allocations on the f64 -> f32 path.
|
||||
let f32_values: Vec<f32> = matrix.values.iter().map(|&v| v as f32).collect();
|
||||
let f32_matrix = CsrMatrix {
|
||||
row_ptr: matrix.row_ptr.clone(),
|
||||
|
|
|
|||
|
|
@ -268,7 +268,7 @@ impl<T: Copy + Default + std::ops::AddAssign> CsrMatrix<T> {
|
|||
entries: impl IntoIterator<Item = (usize, usize, T)>,
|
||||
) -> Self {
|
||||
let mut sorted: Vec<_> = entries.into_iter().collect();
|
||||
sorted.sort_by_key(|(r, c, _)| (*r, *c));
|
||||
sorted.sort_unstable_by_key(|(r, c, _)| (*r, *c));
|
||||
|
||||
let nnz = sorted.len();
|
||||
let mut row_ptr = vec![0usize; rows + 1];
|
||||
|
|
@ -276,7 +276,7 @@ impl<T: Copy + Default + std::ops::AddAssign> CsrMatrix<T> {
|
|||
let mut values = Vec::with_capacity(nnz);
|
||||
|
||||
for &(r, _, _) in &sorted {
|
||||
debug_assert!(r < rows, "row index {} out of bounds (rows={})", r, rows);
|
||||
assert!(r < rows, "row index {} out of bounds (rows={})", r, rows);
|
||||
row_ptr[r + 1] += 1;
|
||||
}
|
||||
for i in 1..=rows {
|
||||
|
|
@ -284,7 +284,7 @@ impl<T: Copy + Default + std::ops::AddAssign> CsrMatrix<T> {
|
|||
}
|
||||
|
||||
for (_, c, v) in sorted {
|
||||
debug_assert!(c < cols, "col index {} out of bounds (cols={})", c, cols);
|
||||
assert!(c < cols, "col index {} out of bounds (cols={})", c, cols);
|
||||
col_indices.push(c);
|
||||
values.push(v);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue