bench(rabitq,rulake): Hadamard vs Haar — 3× prime speedup at D=128

Adds direct comparison in rulake-demo. RandomRotationKind re-exported at the crate root so callers don't need to reach into the rotation module. Measured (clustered Gaussian, D=128, rerank×20): n= 5 000 Haar build: 22.4 ms Hadamard: 7.2 ms (3.09×) n=50 000 Haar build: 211.6 ms Hadamard: 72.7 ms (2.91×) n=100 000 Haar build: 421.1 ms Hadamard: 142.9 ms (2.95×) Matches the O(D²) → O(D log D) theoretical speedup: at D=128, ~16 K flops for the dense matrix multiply vs ~900 flops for three FWHT passes + three sign-vector multiplies. The 3× ceiling reflects that other allocations + SoA writes take non-negligible fraction of build time. Per-query QPS is flat (±3% noise) because the query-side rotation is only one of many per-query steps — the scan + rerank dominate, especially at n ≥ 50k. Hadamard's win is entirely on the prime / cold-start path, which was already the critical-path latency for cache-miss queries. Hadamard + existing parallel prime stack: n=100k total prime (incl. compression + SoA writes) still ~40 ms (parallel prime already dominates), but single-threaded rabitq- demo shows the pure-rotation win at 3×. Co-Authored-By: claude-flow <ruv@ruv.net>
2026-05-24 13:54:31 +00:00 · 2026-04-23 23:09:52 -04:00 · 2026-04-23 23:09:52 -04:00 · bf48f16e27
commit bf48f16e27
parent f357801ed4
2 changed files with 36 additions and 3 deletions
--- a/crates/ruvector-rabitq/src/lib.rs
+++ b/crates/ruvector-rabitq/src/lib.rs
@ -56,4 +56,4 @@ pub use index::{
 };
 pub use kernel::{CpuKernel, KernelCaps, ScanRequest, ScanResponse, VectorKernel};
 pub use quantize::{pack_bits, unpack_bits, BinaryCode};
-pub use rotation::RandomRotation;
+pub use rotation::{RandomRotation, RandomRotationKind};
--- a/crates/ruvector-rulake/src/bin/rulake-demo.rs
+++ b/crates/ruvector-rulake/src/bin/rulake-demo.rs
@ -18,7 +18,7 @@ use std::time::Instant;
 use rand::SeedableRng;
 use rand_distr::{Distribution, Normal, Uniform};

-use ruvector_rabitq::{AnnIndex, RabitqPlusIndex};
+use ruvector_rabitq::{AnnIndex, RabitqPlusIndex, RandomRotationKind};
 use ruvector_rulake::{cache::Consistency, LocalBackend, RuLake, SearchResult};

 fn clustered(n: usize, d: usize, n_clusters: usize, seed: u64) -> Vec<Vec<f32>> {
@ -62,6 +62,31 @@ fn measure_direct(
    (build_ms, qps)
 }

+/// Same shape as [`measure_direct`] but uses a randomised-Hadamard
+/// rotation instead of the default Haar matrix (ADR-158 feature).
+fn measure_direct_hadamard(
+    d: usize,
+    rerank: usize,
+    seed: u64,
+    data: &[Vec<f32>],
+    queries: &[Vec<f32>],
+) -> (f64, f64) {
+    let t = Instant::now();
+    let mut idx =
+        RabitqPlusIndex::new_with_rotation(d, seed, rerank, RandomRotationKind::HadamardSigned);
+    for (i, v) in data.iter().enumerate() {
+        idx.add(i, v.clone()).unwrap();
+    }
+    let build_ms = t.elapsed().as_secs_f64() * 1000.0;
+
+    let t = Instant::now();
+    for q in queries {
+        let _ = idx.search(q, 10).unwrap();
+    }
+    let qps = queries.len() as f64 / t.elapsed().as_secs_f64();
+    (build_ms, qps)
+}
+
 fn measure_rulake_single(
    d: usize,
    rerank: usize,
@ -241,10 +266,18 @@ fn main() {

        let (direct_build, direct_qps) = measure_direct(d, rerank, seed, &data, &queries);
        println!(
-            "  direct RaBitQ+           build={:>8.1} ms   qps={:>8.0}",
+            "  direct RaBitQ+ (Haar)    build={:>8.1} ms   qps={:>8.0}",
            direct_build, direct_qps
        );

+        let (hada_build, hada_qps) = measure_direct_hadamard(d, rerank, seed, &data, &queries);
+        println!(
+            "  direct RaBitQ+ (Hadamard) build={:>8.1} ms   qps={:>8.0}   build_speedup={:.2}×",
+            hada_build,
+            hada_qps,
+            direct_build / hada_build.max(0.001)
+        );
+
        let (lake_prime, lake_qps) =
            measure_rulake_single(d, rerank, seed, &data, &queries, Consistency::Fresh);
        println!(