mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-24 22:15:18 +00:00
feat(sparse-mario): iter 4 — bench dense vs sparse vs sparse+FastGRNN
Adds `benches/sparse_mario_bench.rs` exercising the retrieval workload
shape (heads=1, head_dim=64, non-causal, window=256, block=64) at
seq lengths 256/512/1024/2048 — the realistic range of corpus + prefix
in the example.
Headline numbers (Ryzen 9 9950X, --features parallel,
--warm-up-time 1 --measurement-time 3 --sample-size 20):
seq dense sparse sparse+FG speedup (sparse vs dense)
256 2.41 ms 1.74 ms 2.23 ms 1.4x
512 9.59 ms 5.21 ms 6.24 ms 1.8x
1024 38.4 ms 12.2 ms 14.2 ms 3.1x
2048 154 ms 26.2 ms 30.3 ms 5.9x
Dense scales 4x per doubling (O(N²) confirmed). Sparse scales ~2x per
doubling (sub-quadratic). FastGRNN gate adds a small constant cost
that dominates at small N and single-head; it would pay back at
longer sequences and wider heads — iter 5 will sweep this.
Iter-plan progress:
✓ 1-3. corpus + retrieval LM + ASCII generation
✓ 4. sparse-mario bench ← here
5. fp16 KV cache + FastGRNN sweep + top-k sampling
6. validation + final summary
Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
2962c104e3
commit
03f8d08fd0
2 changed files with 115 additions and 0 deletions
|
|
@ -42,3 +42,7 @@ criterion = "0.5"
|
|||
[[bench]]
|
||||
name = "attention_bench"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "sparse_mario_bench"
|
||||
harness = false
|
||||
|
|
|
|||
111
crates/ruvllm_sparse_attention/benches/sparse_mario_bench.rs
Normal file
111
crates/ruvllm_sparse_attention/benches/sparse_mario_bench.rs
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
// sparse_mario_bench — benchmark the retrieval workload used by
|
||||
// `examples/sparse_mario.rs` against three attention paths:
|
||||
//
|
||||
// 1. dense_attention — O(N²) baseline
|
||||
// 2. sparse forward() — O(N log N) with non-causal window+log-stride+landmarks
|
||||
// 3. sparse forward_gated_with_fastgrnn() — near-linear with FastGRNN salience gate
|
||||
//
|
||||
// Tensor shape mirrors sparse-mario: heads=1, head_dim=64, non-causal,
|
||||
// window=256, block=64. Sequence lengths 256/512/1024/2048 cover the
|
||||
// realistic range of corpus_len + prefix_len in the example (2.1K–2.9K).
|
||||
//
|
||||
// Run with:
|
||||
// cargo bench --bench sparse_mario_bench --features parallel \
|
||||
// -- --warm-up-time 1 --measurement-time 3 --sample-size 20
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use ruvllm_sparse_attention::{
|
||||
dense_attention, AttentionBackend, FastGrnnGate, SparseAttentionConfig,
|
||||
SubquadraticSparseAttention, Tensor3,
|
||||
};
|
||||
|
||||
const HEAD_DIM: usize = 64;
|
||||
const N_HEADS: usize = 1;
|
||||
const SEQS: &[usize] = &[256, 512, 1024, 2048];
|
||||
|
||||
fn random_tensor(seq: usize, seed: u64) -> Tensor3 {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
let len = seq * N_HEADS * HEAD_DIM;
|
||||
let data: Vec<f32> = (0..len).map(|_| rng.gen_range(-1.0f32..1.0f32)).collect();
|
||||
Tensor3::from_vec(data, seq, N_HEADS, HEAD_DIM).unwrap()
|
||||
}
|
||||
|
||||
fn mario_config() -> SparseAttentionConfig {
|
||||
SparseAttentionConfig {
|
||||
window: 256,
|
||||
block_size: 64,
|
||||
global_tokens: vec![0],
|
||||
causal: false,
|
||||
use_log_stride: true,
|
||||
use_landmarks: true,
|
||||
sort_candidates: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn bench_dense(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("sparse_mario/dense");
|
||||
for &seq in SEQS {
|
||||
let q = random_tensor(seq, 1);
|
||||
let k = random_tensor(seq, 2);
|
||||
let v = random_tensor(seq, 3);
|
||||
group.bench_function(format!("seq_{}", seq), |b| {
|
||||
b.iter(|| {
|
||||
// sparse-mario uses non-causal attention; dense_attention's
|
||||
// last arg is the causal flag.
|
||||
dense_attention(black_box(&q), black_box(&k), black_box(&v), false).unwrap()
|
||||
})
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_sparse(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("sparse_mario/sparse");
|
||||
let attention = SubquadraticSparseAttention::new(mario_config()).unwrap();
|
||||
for &seq in SEQS {
|
||||
let q = random_tensor(seq, 4);
|
||||
let k = random_tensor(seq, 5);
|
||||
let v = random_tensor(seq, 6);
|
||||
group.bench_function(format!("seq_{}", seq), |b| {
|
||||
b.iter(|| {
|
||||
attention
|
||||
.forward(black_box(&q), black_box(&k), black_box(&v))
|
||||
.unwrap()
|
||||
})
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_sparse_fastgrnn(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("sparse_mario/sparse_fastgrnn");
|
||||
let attention = SubquadraticSparseAttention::new(mario_config()).unwrap();
|
||||
let gate = FastGrnnGate::new(HEAD_DIM, 32);
|
||||
|
||||
for &seq in SEQS {
|
||||
let q = random_tensor(seq, 7);
|
||||
let k = random_tensor(seq, 8);
|
||||
let v = random_tensor(seq, 9);
|
||||
// Keep top 25% of long-range candidates — FastGRNN drops the rest.
|
||||
let gate_top_k = (seq / 4).max(8);
|
||||
group.bench_function(format!("seq_{}", seq), |b| {
|
||||
b.iter(|| {
|
||||
attention
|
||||
.forward_gated_with_fastgrnn(
|
||||
black_box(&q),
|
||||
black_box(&k),
|
||||
black_box(&v),
|
||||
&gate,
|
||||
gate_top_k,
|
||||
)
|
||||
.unwrap()
|
||||
})
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_dense, bench_sparse, bench_sparse_fastgrnn);
|
||||
criterion_main!(benches);
|
||||
Loading…
Add table
Add a link
Reference in a new issue