mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-24 13:54:31 +00:00
bench(rabitq,rulake): Hadamard vs Haar — 3× prime speedup at D=128
Adds direct comparison in rulake-demo. RandomRotationKind re-exported at the crate root so callers don't need to reach into the rotation module. Measured (clustered Gaussian, D=128, rerank×20): n= 5 000 Haar build: 22.4 ms Hadamard: 7.2 ms (3.09×) n=50 000 Haar build: 211.6 ms Hadamard: 72.7 ms (2.91×) n=100 000 Haar build: 421.1 ms Hadamard: 142.9 ms (2.95×) Matches the O(D²) → O(D log D) theoretical speedup: at D=128, ~16 K flops for the dense matrix multiply vs ~900 flops for three FWHT passes + three sign-vector multiplies. The 3× ceiling reflects that other allocations + SoA writes take non-negligible fraction of build time. Per-query QPS is flat (±3% noise) because the query-side rotation is only one of many per-query steps — the scan + rerank dominate, especially at n ≥ 50k. Hadamard's win is entirely on the prime / cold-start path, which was already the critical-path latency for cache-miss queries. Hadamard + existing parallel prime stack: n=100k total prime (incl. compression + SoA writes) still ~40 ms (parallel prime already dominates), but single-threaded rabitq- demo shows the pure-rotation win at 3×. Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
f357801ed4
commit
bf48f16e27
2 changed files with 36 additions and 3 deletions
|
|
@ -56,4 +56,4 @@ pub use index::{
|
|||
};
|
||||
pub use kernel::{CpuKernel, KernelCaps, ScanRequest, ScanResponse, VectorKernel};
|
||||
pub use quantize::{pack_bits, unpack_bits, BinaryCode};
|
||||
pub use rotation::RandomRotation;
|
||||
pub use rotation::{RandomRotation, RandomRotationKind};
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ use std::time::Instant;
|
|||
use rand::SeedableRng;
|
||||
use rand_distr::{Distribution, Normal, Uniform};
|
||||
|
||||
use ruvector_rabitq::{AnnIndex, RabitqPlusIndex};
|
||||
use ruvector_rabitq::{AnnIndex, RabitqPlusIndex, RandomRotationKind};
|
||||
use ruvector_rulake::{cache::Consistency, LocalBackend, RuLake, SearchResult};
|
||||
|
||||
fn clustered(n: usize, d: usize, n_clusters: usize, seed: u64) -> Vec<Vec<f32>> {
|
||||
|
|
@ -62,6 +62,31 @@ fn measure_direct(
|
|||
(build_ms, qps)
|
||||
}
|
||||
|
||||
/// Same shape as [`measure_direct`] but uses a randomised-Hadamard
|
||||
/// rotation instead of the default Haar matrix (ADR-158 feature).
|
||||
fn measure_direct_hadamard(
|
||||
d: usize,
|
||||
rerank: usize,
|
||||
seed: u64,
|
||||
data: &[Vec<f32>],
|
||||
queries: &[Vec<f32>],
|
||||
) -> (f64, f64) {
|
||||
let t = Instant::now();
|
||||
let mut idx =
|
||||
RabitqPlusIndex::new_with_rotation(d, seed, rerank, RandomRotationKind::HadamardSigned);
|
||||
for (i, v) in data.iter().enumerate() {
|
||||
idx.add(i, v.clone()).unwrap();
|
||||
}
|
||||
let build_ms = t.elapsed().as_secs_f64() * 1000.0;
|
||||
|
||||
let t = Instant::now();
|
||||
for q in queries {
|
||||
let _ = idx.search(q, 10).unwrap();
|
||||
}
|
||||
let qps = queries.len() as f64 / t.elapsed().as_secs_f64();
|
||||
(build_ms, qps)
|
||||
}
|
||||
|
||||
fn measure_rulake_single(
|
||||
d: usize,
|
||||
rerank: usize,
|
||||
|
|
@ -241,10 +266,18 @@ fn main() {
|
|||
|
||||
let (direct_build, direct_qps) = measure_direct(d, rerank, seed, &data, &queries);
|
||||
println!(
|
||||
" direct RaBitQ+ build={:>8.1} ms qps={:>8.0}",
|
||||
" direct RaBitQ+ (Haar) build={:>8.1} ms qps={:>8.0}",
|
||||
direct_build, direct_qps
|
||||
);
|
||||
|
||||
let (hada_build, hada_qps) = measure_direct_hadamard(d, rerank, seed, &data, &queries);
|
||||
println!(
|
||||
" direct RaBitQ+ (Hadamard) build={:>8.1} ms qps={:>8.0} build_speedup={:.2}×",
|
||||
hada_build,
|
||||
hada_qps,
|
||||
direct_build / hada_build.max(0.001)
|
||||
);
|
||||
|
||||
let (lake_prime, lake_qps) =
|
||||
measure_rulake_single(d, rerank, seed, &data, &queries, Consistency::Fresh);
|
||||
println!(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue