feat(collections): PIAL Phase 0 — Miller-Rabin primality kernel + prime tables (#358)

feat(collections): PIAL Phase 0 — Miller-Rabin primality kernel + prime tables
This commit is contained in:
rUv 2026-04-20 14:28:43 -04:00 committed by GitHub
commit 855d8faec4
13 changed files with 2098 additions and 1 deletions

1
Cargo.lock generated
View file

@ -9124,6 +9124,7 @@ version = "2.2.0"
dependencies = [
"bincode 2.0.1",
"chrono",
"criterion 0.5.1",
"dashmap 6.1.0",
"parking_lot 0.12.5",
"ruvector-core 2.2.0",

View file

@ -7,6 +7,13 @@ authors.workspace = true
repository.workspace = true
readme = "README.md"
description = "High-performance collection management for Ruvector vector databases"
build = "build.rs"
[features]
default = []
# Opt-in probabilistic Miller-Rabin for u128 (PRD §5, ADR-151).
# WASM u128 codegen is ~5× slower than native; gate keeps it out of default bundles.
unstable-u128 = []
[dependencies]
ruvector-core = { version = "2.0.2", path = "../ruvector-core" }
@ -20,3 +27,9 @@ bincode = { workspace = true }
chrono = { workspace = true }
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
[[bench]]
name = "primality"
harness = false

View file

@ -0,0 +1,57 @@
//! Phase-0 benches for ADR-151 / PIAL.
//!
//! Targets (M-series):
//!
//! | bench | target |
//! |------------------------------------------|--------|
//! | `is_prime_u64` (worst case) | ≤ 50 ns |
//! | `prev_prime_below_pow2` (table fast path)| ≤ 1 ns |
//! | `next_prime_u64` (arbitrary) | ≤ 2 µs |
//! | `next_prime_u64` (2^61) | ≤ 12 µs |
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use ruvector_collections::primality::{
is_prime_u64, next_prime_u64, prev_prime_below_pow2,
};
fn bench_is_prime_u64_worst_case(c: &mut Criterion) {
// The Sinclair witness loop runs to completion only on actual primes,
// so use the largest u64 prime as worst-case input.
let n = u64::MAX - 58;
c.bench_function("is_prime_u64/worst_case_largest_u64_prime", |b| {
b.iter(|| is_prime_u64(black_box(n)))
});
}
fn bench_prev_prime_below_pow2_table(c: &mut Criterion) {
c.bench_function("prev_prime_below_pow2/k=32_shard_router", |b| {
b.iter(|| prev_prime_below_pow2(black_box(32)))
});
}
fn bench_next_prime_u64_arbitrary(c: &mut Criterion) {
// Pick a value off the power-of-two grid so the fast path is missed
// and the general MR descent is exercised.
let n: u64 = 1_000_003_777;
c.bench_function("next_prime_u64/arbitrary_~1e9", |b| {
b.iter(|| next_prime_u64(black_box(n)))
});
}
fn bench_next_prime_u64_2_pow_61(c: &mut Criterion) {
// 2^61 hits the table fast path via the power-of-two check; subtract 1
// to force the general MR descent against a worst-case-shaped input.
let n: u64 = (1u64 << 61) - 1;
c.bench_function("next_prime_u64/2^61_minus_1_general_path", |b| {
b.iter(|| next_prime_u64(black_box(n)))
});
}
criterion_group!(
primality_benches,
bench_is_prime_u64_worst_case,
bench_prev_prime_below_pow2_table,
bench_next_prime_u64_arbitrary,
bench_next_prime_u64_2_pow_61
);
criterion_main!(primality_benches);

View file

@ -0,0 +1,73 @@
// build.rs — emits PRIMES_BELOW_2K[57] and PRIMES_ABOVE_2K[57] using the
// same Miller-Rabin kernel that ships at runtime. ADR-151 acceptance #2
// requires the table and the runtime to agree on every entry, and this is
// how we guarantee that — one source of truth, included from both sides.
use std::env;
use std::fs;
use std::path::PathBuf;
include!("src/primality_kernel.rs");
fn main() {
println!("cargo:rerun-if-changed=src/primality_kernel.rs");
println!("cargo:rerun-if-changed=build.rs");
let mut out = String::with_capacity(4096);
out.push_str(
"// AUTO-GENERATED by build.rs from primality_kernel.rs.\n\
// Do not edit by hand — regenerated on every build.\n\
//\n\
// Index: table[k - 8] holds the prime for exponent k, k in [8, 64].\n\n",
);
// BELOW: largest prime strictly less than 2^k.
out.push_str(
"/// Largest prime strictly less than 2^k for k in [8, 64], indexed by `k - 8`.\n\
///\n\
/// Generated at build time from the same Miller-Rabin kernel that ships at runtime\n\
/// (ADR-151 acceptance #2). Re-validated under `cargo test`.\n",
);
out.push_str("pub const PRIMES_BELOW_2K: [u64; 57] = [\n");
for k in 8u32..=64 {
let p = if k == 64 {
// 2^64 overflows u64. Largest prime < 2^64 is the largest u64
// prime; u64::MAX itself is composite, so prev_prime(u64::MAX)
// gives the right answer.
mr_prev_prime_u64(u64::MAX)
} else {
mr_prev_prime_u64(1u64 << k)
};
out.push_str(&format!(" {p}, // largest prime < 2^{k}\n"));
}
out.push_str("];\n\n");
// ABOVE: smallest prime strictly greater than 2^k.
out.push_str(
"/// Smallest prime strictly greater than 2^k for k in [8, 64], indexed by `k - 8`.\n\
///\n\
/// Entry at k = 64 is `0` (sentinel) — no u64 prime exists greater than 2^64.\n\
/// Runtime callers must avoid that index.\n",
);
out.push_str("pub const PRIMES_ABOVE_2K: [u64; 57] = [\n");
for k in 8u32..=64 {
let p = if k == 64 {
// No u64 prime exists strictly greater than 2^64. Emit a sentinel
// and forbid this index at the runtime call site (debug_assert
// in next_prime_above_pow2).
0u64
} else {
mr_next_prime_u64(1u64 << k)
};
if p == 0 {
out.push_str(&format!(" 0, // sentinel: no u64 prime > 2^{k}\n"));
} else {
out.push_str(&format!(" {p}, // smallest prime > 2^{k}\n"));
}
}
out.push_str("];\n");
let out_dir = PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR not set"));
let out_path = out_dir.join("prime_tables.rs");
fs::write(&out_path, out).expect("failed to write prime_tables.rs");
}

View file

@ -1,6 +1,7 @@
//! # Ruvector Collections
//!
//! Multi-collection management with aliases for organizing vector databases.
//! Multi-collection management with aliases for organizing vector databases,
//! plus the workspace's shared primality utility (ADR-151 / PIAL).
//!
//! ## Features
//!
@ -9,6 +10,9 @@
//! - **Collection Statistics**: Track collection metrics
//! - **Thread-safe**: Concurrent access using DashMap
//! - **Persistence**: Store collections on disk
//! - **Primality**: Deterministic Miller-Rabin + tabled fast paths for prime
//! moduli used by ruvector-graph, micro-hnsw-wasm, sparsifier, attn-mincut,
//! and pi-brain (see [`primality`])
//!
//! ## Example
//!
@ -47,6 +51,7 @@
pub mod collection;
pub mod error;
pub mod manager;
pub mod primality;
pub use collection::{Collection, CollectionConfig, CollectionStats};
pub use error::{CollectionError, Result};

View file

@ -0,0 +1,316 @@
//! Deterministic Miller-Rabin primality plus tabled fast paths for the
//! power-of-two-aligned cases that dominate ruvector's hot paths.
//!
//! Designed for ADR-151 (PIAL — Prime-Indexed Acceleration Layer). Five
//! consumers (shard router, HNSW buckets, sparsifier strides, mincut LSH,
//! pi-brain witness chain) get one shared utility and zero new external
//! dependencies.
//!
//! # Determinism
//!
//! | Range | Witnesses | Result |
//! |-------|-----------|--------|
//! | `n < 2^32` | `{2, 7, 61}` (Pomerance/Selfridge/Wagstaff) | Deterministic |
//! | `n < 2^64` | `{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}` (Sinclair, 2011) | Deterministic |
//! | `n < 2^128` | 40 random rounds (`unstable-u128` feature) | `Pr[err] < 2⁻⁸⁰` |
//!
//! Pinned-pseudoprime regressions in `tests/primality_pseudoprimes.rs`
//! protect the deterministic ranges from witness-set "optimizations".
//!
//! # Hot vs cold paths
//!
//! Three of PIAL's five sites request primes near *fixed* power-of-two
//! sizes. Those calls hit [`prev_prime_below_pow2`] / [`next_prime_above_pow2`]
//! — a single L1-cached load, ~1 ns. The two unpredictable sites (LSH
//! universe, witness ephemeral primes) use the general MR descent at
//! ~250 ns. Both are cold.
//!
//! Crucially the table is generated at build time from this very module's
//! [`is_prime_u64`], so MR remains the source of truth.
// Pull in the deterministic Miller-Rabin kernel that build.rs also uses.
// Same code, same answers — that's the whole point.
include!("primality_kernel.rs");
// Pull in the build-time-generated tables (PRIMES_BELOW_2K, PRIMES_ABOVE_2K).
include!(concat!(env!("OUT_DIR"), "/prime_tables.rs"));
/// Returns `true` iff `n` is prime. Deterministic for all `u32`.
///
/// Uses the Pomerance/Selfridge/Wagstaff witness set `{2, 7, 61}` via the
/// shared u64 path.
#[inline]
pub fn is_prime_u32(n: u32) -> bool {
mr_is_prime_u32(n)
}
/// Returns `true` iff `n` is prime. Deterministic for all `u64`.
///
/// Uses Sinclair's 2011 witness set
/// `{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}` — known to be sufficient
/// for the entire `u64` range. Allocation-free.
#[inline]
pub fn is_prime_u64(n: u64) -> bool {
mr_is_prime_u64(n)
}
/// Largest prime strictly less than `2^k`, for `k ∈ [8, 64]`.
///
/// Single L1-cached table load (~1 ns). Use this whenever the caller knows
/// the size is a power of two — shard routers, HNSW bucket sizing,
/// sparsifier strides.
///
/// # Panics (debug only)
///
/// Debug-asserts `8 <= k <= 64`.
#[inline]
pub fn prev_prime_below_pow2(k: u32) -> u64 {
debug_assert!((8..=64).contains(&k), "k out of table range [8, 64]");
PRIMES_BELOW_2K[(k - 8) as usize]
}
/// Smallest prime strictly greater than `2^k`, for `k ∈ [8, 63]`.
///
/// Symmetric companion to [`prev_prime_below_pow2`]. The `k = 64` entry of
/// the underlying table is a sentinel (no `u64` prime exists greater than
/// `2^64`); callers must not request it.
///
/// # Panics (debug only)
///
/// Debug-asserts `8 <= k <= 63`.
#[inline]
pub fn next_prime_above_pow2(k: u32) -> u64 {
debug_assert!(
(8..=63).contains(&k),
"k out of table range [8, 63]; PRIMES_ABOVE_2K[64] is a sentinel"
);
PRIMES_ABOVE_2K[(k - 8) as usize]
}
/// Largest prime strictly less than `n`. Returns `0` if no such `u64` prime
/// exists (i.e. `n <= 2`).
///
/// Routes power-of-two-aligned inputs (`n = 2^k`, `k ∈ [8, 64]`) to the
/// table; everything else falls through to a Miller-Rabin descent.
#[inline]
pub fn prev_prime_u64(n: u64) -> u64 {
if n.is_power_of_two() {
let k = n.trailing_zeros();
if (8..=64).contains(&k) {
return PRIMES_BELOW_2K[(k - 8) as usize];
}
}
mr_prev_prime_u64(n)
}
/// Smallest prime strictly greater than `n`. Returns `0` if `n` is at or
/// above the largest `u64` prime (`u64::MAX - 58`).
///
/// Routes power-of-two-aligned inputs (`n = 2^k`, `k ∈ [8, 63]`) to the
/// table; everything else falls through to a Miller-Rabin descent.
#[inline]
pub fn next_prime_u64(n: u64) -> u64 {
if n.is_power_of_two() {
let k = n.trailing_zeros();
if (8..=63).contains(&k) {
return PRIMES_ABOVE_2K[(k - 8) as usize];
}
}
mr_next_prime_u64(n)
}
/// Derives a deterministic ephemeral prime from `seed`, suitable for the
/// pi-brain witness chain (ADR-151 §4.4).
///
/// Maps the seed into the odd lower-2⁶¹ window then walks up to the next
/// prime. The 2⁶¹ ceiling keeps results well inside `u64` even after the
/// MR walk and lets downstream consumers store the value in a single
/// 64-bit field with room to spare.
#[inline]
pub fn ephemeral_prime(seed: u64) -> u64 {
let mask = (1u64 << 61) - 1;
let s = (seed | 1) & mask;
if mr_is_prime_u64(s) {
s
} else {
// Bounded: the prime gap below 2^61 is far smaller than the
// remaining headroom to u64::MAX, so this never returns 0.
mr_next_prime_u64(s)
}
}
// ── Probabilistic u128 mode (opt-in) ─────────────────────────────────────
/// Probabilistic Miller-Rabin for `u128`. Soundness error `< 4^-rounds`;
/// `rounds = 40` gives `< 2⁻⁸⁰`, adequate for hashing but **not** a
/// cryptographic prime generator (see ADR-151 "Security Considerations").
///
/// Gated behind the `unstable-u128` feature: WASM `u128` codegen is ~5×
/// slower than native and we keep it out of default bundles.
#[cfg(feature = "unstable-u128")]
pub fn is_prime_u128(n: u128, rounds: u8) -> bool {
if n < 2 {
return false;
}
// Cheap divisibility screen — also catches every n that fits in u64
// and is one of the Sinclair witnesses.
const SMALL_PRIMES: [u128; 12] = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37];
for &p in &SMALL_PRIMES {
if n == p {
return true;
}
if n.is_multiple_of(p) {
return false;
}
}
// If n fits in u64, defer to the deterministic path.
if n <= u64::MAX as u128 {
return mr_is_prime_u64(n as u64);
}
// n > u64::MAX, n odd, coprime to first 12 primes. Decompose n - 1.
let nm1 = n - 1;
let s = nm1.trailing_zeros();
let d = nm1 >> s;
// Tiny inline LCG seeded from n so the test is reproducible across runs.
// Numerical-Recipes-style multiplier; we only need uniformity, not crypto.
let mut state: u128 = n ^ 0x9E37_79B9_7F4A_7C15_F39C_C060_5CED_C835u128;
for _ in 0..rounds {
state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
// Witness in [2, n-2].
let a = 2u128 + (state % (n - 3));
if mr_is_composite_u128(n, d, s, a) {
return false;
}
}
true
}
#[cfg(feature = "unstable-u128")]
#[inline]
fn mr_is_composite_u128(n: u128, d: u128, s: u32, a: u128) -> bool {
let mut x = powmod_u128(a, d, n);
if x == 1 || x == n - 1 {
return false;
}
for _ in 0..s.saturating_sub(1) {
x = mulmod_u128(x, x, n);
if x == n - 1 {
return false;
}
}
true
}
#[cfg(feature = "unstable-u128")]
#[inline]
fn powmod_u128(mut base: u128, mut exp: u128, m: u128) -> u128 {
if m == 1 {
return 0;
}
let mut acc: u128 = 1 % m;
base %= m;
while exp > 0 {
if exp & 1 == 1 {
acc = mulmod_u128(acc, base, m);
}
exp >>= 1;
if exp > 0 {
base = mulmod_u128(base, base, m);
}
}
acc
}
// Russian-peasant mulmod for u128 — works for any m < 2^128 without a u256.
#[cfg(feature = "unstable-u128")]
#[inline]
fn mulmod_u128(mut a: u128, mut b: u128, m: u128) -> u128 {
let mut acc: u128 = 0;
a %= m;
while b > 0 {
if b & 1 == 1 {
acc = mod_add_u128(acc, a, m);
}
a = mod_add_u128(a, a, m);
b >>= 1;
}
acc
}
#[cfg(feature = "unstable-u128")]
#[inline]
fn mod_add_u128(a: u128, b: u128, m: u128) -> u128 {
// Pre: a < m, b < m, m may be > 2^127. Computed (a + b) mod m without
// a u256 by detecting wrapping overflow.
let sum = a.wrapping_add(b);
if sum < a || sum >= m {
sum.wrapping_sub(m)
} else {
sum
}
}
// ── Internal sanity tests (run with the rest of the crate's unit tests) ──
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn small_primes_under_100() {
let known: [u64; 25] = [
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79,
83, 89, 97,
];
for n in 0u64..100 {
assert_eq!(is_prime_u64(n), known.contains(&n), "is_prime_u64({n})");
}
}
#[test]
fn edges() {
assert!(!is_prime_u64(0));
assert!(!is_prime_u64(1));
assert!(!is_prime_u64(u64::MAX));
assert!(is_prime_u64(u64::MAX - 58), "largest u64 prime");
}
#[test]
fn table_index_round_trip() {
// The most heavily-used shard-router entry.
assert_eq!(prev_prime_below_pow2(32), 4_294_967_291);
// Smallest table entry.
assert_eq!(prev_prime_below_pow2(8), 251);
// Largest table entry.
assert_eq!(prev_prime_below_pow2(64), u64::MAX - 58);
}
#[cfg(feature = "unstable-u128")]
#[test]
fn u128_probabilistic_smoke() {
use super::is_prime_u128;
// Defers to deterministic u64 path for n <= u64::MAX.
assert!(is_prime_u128(7, 40));
assert!(!is_prime_u128(9, 40));
assert!(is_prime_u128(u64::MAX as u128 - 58, 40));
// True u128 path: 2^89 - 1 is a Mersenne prime.
let m89: u128 = (1u128 << 89) - 1;
assert!(is_prime_u128(m89, 40), "M_89 = 2^89 - 1 is prime");
// Composite just above 2^64.
let composite: u128 = (1u128 << 65) + 1; // = 3 * 11 * 67 * ... (divisible by 3)
assert!(!is_prime_u128(composite, 40));
}
#[test]
fn ephemeral_prime_is_prime_for_assorted_seeds() {
for seed in [0u64, 1, 42, 0xDEAD_BEEF, u64::MAX, 1_000_003] {
let p = ephemeral_prime(seed);
assert!(is_prime_u64(p), "ephemeral_prime({seed}) = {p} not prime");
// Loose upper bound: largest known prime gap below 2^64 is well under 2^31,
// so anything below 2^62 means the walk stayed near its 2^61 starting window.
assert!(p < (1u64 << 62), "ephemeral_prime overshot expected window");
}
}
}

View file

@ -0,0 +1,162 @@
// Deterministic Miller-Rabin kernel — ADR-151 (PIAL).
//
// `include!`d into two contexts (build.rs and src/primality.rs) which use
// different subsets of the symbols. Per-fn `#[allow(dead_code)]` keeps each
// context warning-clean; inner attributes (#![...]) aren't legal in
// included files.
//
// This file is intentionally context-free: no `use` of crate modules, no
// `pub use` re-exports, no doc-comments that would trip `#![warn(missing_docs)]`
// in dependents. It is `include!`d from BOTH `src/primality.rs` AND `build.rs`
// so the table generator and the runtime share one source of truth.
//
// Witness sets:
// u32: {2, 7, 61} Pomerance/Selfridge/Wagstaff
// u64: {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37} Sinclair (2011)
//
// Both are deterministic over their full ranges. Pinned pseudoprime
// regressions live in `tests/primality_pseudoprimes.rs`.
#[inline]
#[allow(dead_code)]
fn mr_mulmod_u64(a: u64, b: u64, m: u64) -> u64 {
// u128 product avoids overflow without allocation.
((a as u128).wrapping_mul(b as u128) % (m as u128)) as u64
}
#[inline]
#[allow(dead_code)]
fn mr_powmod_u64(mut base: u64, mut exp: u64, m: u64) -> u64 {
if m == 1 {
return 0;
}
let mut acc: u64 = 1;
base %= m;
while exp > 0 {
if exp & 1 == 1 {
acc = mr_mulmod_u64(acc, base, m);
}
exp >>= 1;
if exp > 0 {
base = mr_mulmod_u64(base, base, m);
}
}
acc
}
// Returns true iff `a` is a Miller-Rabin witness of compositeness for `n`.
// Caller guarantees: n is odd, n > 3, and a in [2, n-2]. n - 1 = d * 2^s
// with d odd (passed in pre-decomposed for speed).
#[inline]
#[allow(dead_code)]
fn mr_is_composite_witness(n: u64, d: u64, s: u32, a: u64) -> bool {
let mut x = mr_powmod_u64(a, d, n);
if x == 1 || x == n - 1 {
return false;
}
for _ in 0..s.saturating_sub(1) {
x = mr_mulmod_u64(x, x, n);
if x == n - 1 {
return false;
}
}
true
}
#[inline]
#[allow(dead_code)]
fn mr_is_prime_u64(n: u64) -> bool {
// Small-n fast path covers all of the ill-defined / edge cases the
// Sinclair set assumes away (n < 9, even n, n ≤ largest witness).
if n < 2 {
return false;
}
// Cheap divisibility screen by the first few primes.
const SMALL_PRIMES: [u64; 12] = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37];
for &p in &SMALL_PRIMES {
if n == p {
return true;
}
if n.is_multiple_of(p) {
return false;
}
}
// n is now odd, > 37, and coprime to every Sinclair witness — so
// every witness is a valid base in [2, n-2].
let mut d = n - 1;
let mut s: u32 = 0;
while d & 1 == 0 {
d >>= 1;
s += 1;
}
for &a in &SMALL_PRIMES {
if mr_is_composite_witness(n, d, s, a) {
return false;
}
}
true
}
#[inline]
#[allow(dead_code)]
fn mr_is_prime_u32(n: u32) -> bool {
// Witnesses {2, 7, 61} are sufficient for all u32; reuse the u64
// implementation which already screens small primes.
mr_is_prime_u64(n as u64)
}
// Find the largest prime strictly less than `upper`. Returns 0 if none
// exists in u64 (i.e. upper <= 2). Used by build.rs and the general
// `prev_prime_u64` runtime path.
#[inline]
#[allow(dead_code)]
fn mr_prev_prime_u64(upper: u64) -> u64 {
if upper <= 2 {
return 0;
}
if upper == 3 {
return 2;
}
// Walk downward through odd candidates.
let mut n = upper - 1;
if n & 1 == 0 {
n -= 1;
}
loop {
if mr_is_prime_u64(n) {
return n;
}
if n <= 3 {
return 2;
}
n -= 2;
}
}
// Find the smallest prime strictly greater than `lower`. Returns 0 if
// `lower` >= largest u64 prime (u64::MAX - 58).
#[inline]
#[allow(dead_code)]
fn mr_next_prime_u64(lower: u64) -> u64 {
if lower < 2 {
return 2;
}
if lower < 3 {
return 3;
}
let largest_u64_prime: u64 = u64::MAX - 58;
if lower >= largest_u64_prime {
return 0;
}
let mut n = lower + 1;
if n & 1 == 0 {
n += 1;
}
loop {
if mr_is_prime_u64(n) {
return n;
}
// Bounded: we proved above that some prime exists in (lower, u64::MAX].
n += 2;
}
}

View file

@ -0,0 +1,84 @@
//! Pinned pseudoprime regressions for the deterministic Miller-Rabin path.
//!
//! These exist so any future "optimization" that shrinks the Sinclair-12
//! witness set fails CI immediately. Numbers come from OEIS A014233
//! (smallest strong pseudoprimes to the first n primes).
use ruvector_collections::primality::{is_prime_u32, is_prime_u64};
/// OEIS A014233(4): smallest spsp to bases {2, 3, 5, 7}. Detected by base 11.
const SPP_2357: u64 = 3_215_031_751;
/// OEIS A014233(5): smallest spsp to bases {2, 3, 5, 7, 11}. Detected by base 13.
const SPP_235711: u64 = 2_152_302_898_747;
/// OEIS A014233(11): smallest spsp to first 11 primes (through 31).
/// Detected ONLY by the 12th Sinclair witness, base 37 — the canary that
/// catches anyone shrinking the witness set.
const SPP_FIRST_11: u64 = 3_825_123_056_546_413_051;
#[test]
fn detects_strong_pseudoprime_2357() {
assert!(!is_prime_u64(SPP_2357), "{SPP_2357} is composite (detected by base 11)");
}
#[test]
fn detects_strong_pseudoprime_235711() {
assert!(!is_prime_u64(SPP_235711), "{SPP_235711} is composite (detected by base 13)");
}
#[test]
fn detects_strong_pseudoprime_first_11_primes() {
assert!(
!is_prime_u64(SPP_FIRST_11),
"{SPP_FIRST_11} is composite — detection requires base 37 (Sinclair's last witness)"
);
}
#[test]
fn small_prime_sanity_under_100() {
let primes_under_100: [u64; 25] = [
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
89, 97,
];
for n in 0u64..=100 {
let expected = primes_under_100.contains(&n);
assert_eq!(is_prime_u64(n), expected, "is_prime_u64({n})");
}
}
#[test]
fn edge_cases() {
assert!(!is_prime_u64(0));
assert!(!is_prime_u64(1));
assert!(!is_prime_u64(u64::MAX), "u64::MAX (= 2^64 - 1) factors");
assert!(is_prime_u64(u64::MAX - 58), "largest u64 prime: u64::MAX - 58");
// Largest u32 prime is 2^32 - 5 = 4_294_967_291.
assert!(is_prime_u32(4_294_967_291), "largest u32 prime");
assert!(!is_prime_u32(u32::MAX));
}
#[test]
fn assorted_known_primes() {
// Mersenne and other well-known primes inside u64.
for &p in &[
7u64,
127,
8191,
131_071,
524_287,
2_147_483_647, // 2^31 - 1
2_305_843_009_213_693_951u64, // 2^61 - 1
] {
assert!(is_prime_u64(p), "{p} is a known prime");
}
}
#[test]
fn assorted_known_composites() {
// Carmichael numbers (Fermat-pseudoprimes) — not strong-pseudoprimes,
// but worth pinning since textbook Fermat tests fail on them.
for &n in &[561u64, 1105, 1729, 2465, 2821, 6601, 8911] {
assert!(!is_prime_u64(n), "{n} is a Carmichael number, composite");
}
}

View file

@ -0,0 +1,99 @@
//! Acceptance criterion #2 of ADR-151: every entry of `PRIMES_BELOW_2K` and
//! `PRIMES_ABOVE_2K` must agree with the runtime Miller-Rabin descent.
//!
//! For each `k ∈ [8, 64]` (BELOW) / `[8, 63]` (ABOVE) we re-run MR on the
//! tabled prime, then sweep every odd integer in the gap to `2^k` and
//! assert no other prime hides there. This is what makes MR — not the
//! table — the source of truth.
use ruvector_collections::primality::{
is_prime_u64, PRIMES_ABOVE_2K, PRIMES_BELOW_2K,
};
/// Iterate odd candidates strictly between `lo` (exclusive) and `hi`
/// (exclusive), without overflowing `u64`. Used to confirm the prime gap
/// reported by the table contains nothing else prime.
fn sweep_odds_strictly_between<F: FnMut(u64)>(lo: u64, hi: u64, mut f: F) {
let mut n = match lo.checked_add(1) {
Some(n) => n,
None => return,
};
if n & 1 == 0 {
n = match n.checked_add(1) {
Some(n) => n,
None => return,
};
}
while n < hi {
f(n);
n = match n.checked_add(2) {
Some(n) => n,
None => return,
};
}
}
#[test]
fn primality_below_table_cross_check() {
for k in 8u32..=64 {
let p = PRIMES_BELOW_2K[(k - 8) as usize];
assert!(
is_prime_u64(p),
"PRIMES_BELOW_2K[k={k}] = {p} not prime per Miller-Rabin"
);
// hi = 2^k, but 2^64 doesn't fit in u64. Cap at u64::MAX + 1 by
// using checked semantics and treating "no upper bound" as scan
// up through u64::MAX inclusive.
let hi = if k == 64 {
// Sweep p+1..=u64::MAX (inclusive). Using u64::MAX as an
// exclusive bound and then checking u64::MAX separately.
sweep_odds_strictly_between(p, u64::MAX, |m| {
assert!(
!is_prime_u64(m),
"found prime {m} > PRIMES_BELOW_2K[64] = {p} (within u64)"
);
});
// u64::MAX itself: factor into 3 × ... so trivially composite,
// but assert anyway.
assert!(!is_prime_u64(u64::MAX), "u64::MAX is composite");
continue;
} else {
1u64 << k
};
sweep_odds_strictly_between(p, hi, |m| {
assert!(
!is_prime_u64(m),
"found prime {m} in (PRIMES_BELOW_2K[k={k}] = {p}, 2^{k} = {hi})"
);
});
}
}
#[test]
fn primality_above_table_cross_check() {
// k = 64 entry is a sentinel (no u64 prime > 2^64) — skip it.
for k in 8u32..=63 {
let p = PRIMES_ABOVE_2K[(k - 8) as usize];
assert!(
is_prime_u64(p),
"PRIMES_ABOVE_2K[k={k}] = {p} not prime per Miller-Rabin"
);
let lo = 1u64 << k;
sweep_odds_strictly_between(lo, p, |m| {
assert!(
!is_prime_u64(m),
"found prime {m} in (2^{k} = {lo}, PRIMES_ABOVE_2K[k={k}] = {p})"
);
});
}
// Sentinel check: the k=64 slot must remain 0 (any non-zero value
// would imply a u64 prime > 2^64, which is impossible).
assert_eq!(
PRIMES_ABOVE_2K[(64 - 8) as usize],
0,
"PRIMES_ABOVE_2K[64] must be the sentinel 0 — there is no u64 prime > 2^64"
);
}

View file

@ -0,0 +1,381 @@
# ADR-151: Miller-RabinDriven Prime Optimizations (PIAL)
## Status
Accepted (Phase 0 landed 2026-04-16; performance targets revised — see "Phase 0 Findings" below)
## Date
2026-04-16
## Authors
ruv.io · RuVector Architecture
## Relates To
- **PRD**: `docs/research/miller-rabin-optimizations/PRD.md`
- ADR-027 — HNSW parameterized query fix
- ADR-038 — npx-ruvector / RVLite witness integration
- ADR-058 — RVF hash security & optimization (finding #6)
- ADR-148 — Brain hypothesis engine
- ADR-149 — Brain performance optimizations
- ADR-150 — π-brain + RuvLtra via Tailscale
## Tier (per ADR-026)
- **Core utility**: Tier-1 (Agent Booster eligible — pure WASM transform)
- **Integration patches**: Tier-2 (Haiku-cost simple edits)
---
## Context
Five independent subsystems in ruvector default to **power-of-two moduli** for
hashing, sharding, sketching, and adjacency storage. Each has a documented or
empirically observed pathology:
1. **ruvector-graph shard router** (ADR-058 finding #6, P3): `xxh3_64() mod
2^k` produces ~50% birthday collisions at 2³² nodes and biases under
Zipfian keys.
2. **micro-hnsw-wasm / hyperbolic-hnsw adjacency**: open-addressed tables
sized to `2^k` cluster on near-duplicate vectors (timestamps, sensor
streams), inflating p99 insert latency.
3. **ruvector-sparsifier stride sampler**: power-of-two strides alias on
grid-structured graphs (images, meshes, lattices) — well-known LCG-era
problem with a well-known fix.
4. **ruvector-attn-mincut LSH families**: `((a·x+b) mod p) mod m` requires
`p` to be prime and `> universe`; today's hand-picked Mersenne constants
silently degrade past their bounds.
5. **pi-brain witness chain** (ADR-038): single-hash (XXH3) tamper-evidence
with no per-share entropy.
A grep across all crates confirms **zero existing primality-testing code** in
ruvector. The `prime-radiant` crate's name is metaphorical (coherence-gate)
and unrelated. There is no infrastructure to build on, but the surface area
is small enough that a single utility module unlocks all five consumers.
We need a primality test that is:
- **Deterministic** for `u64` (the size used by every consumer above).
- **Allocation-free** (hot paths in `no_std` and WASM contexts).
- **Constant-time-ish** for cryptographic-flavored use (witness chain).
- **Cheap enough** to call mid-resharding without operator coordination.
**Miller-Rabin** with the Sinclair (2011) witness set
`{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}` satisfies all of these for
`u64`. For `u32`, the Pomerance/Selfridge/Wagstaff set `{2, 7, 61}` is
sufficient. For `u128` (an opt-in mode for future BFV-flavored work),
probabilistic Miller-Rabin with `k = 40` rounds gives a soundness error of
`< 2^-80` — adequate for hashing and far below cryptographic thresholds.
## Decision
We will introduce a single new module — `crates/ruvector-collections/src/primality.rs`
exposing a deterministic Miller-Rabin primality test plus `next_prime` /
`prev_prime` helpers, and we will wire it into five consumer subsystems
**incrementally, behind feature flags**, in the order described in the PRD's
Rollout Plan.
We deliberately reject every alternative that fragments the workspace
further (new crate, external dependency on `glass_pumpkin` / `num-prime`,
or duplicating logic across `micro-hnsw-wasm` and `ruvector-graph`).
### Architecture Summary
```
┌──────────────────────────────────────────────────────────────┐
│ ruvector-collections::primality (NEW, ~250 LoC, no_std) │
│ │
│ is_prime_u32 / is_prime_u64 / is_prime_u128 │
│ next_prime_u64 / prev_prime_u64 │
│ ephemeral_prime(seed) ← π-brain witness only │
└────────┬──────────────┬──────────────┬──────────────┬─────────┘
▼ ▼ ▼ ▼
shard router HNSW buckets LSH families witness chain
(P1) (P2) (P3, P4, P5) (P6, opt-in)
```
### What We Already Have
| Component | Location | Status |
|-------------------------------------|---------------------------------------------|---------------|
| Workspace utility crate | `crates/ruvector-collections` | Established |
| Lemire `fastmod` | already vendored in tree | Reusable |
| HNSW adjacency abstraction | `crates/micro-hnsw-wasm` | Existing |
| Shard router using XXH3-64 | `crates/ruvector-graph/src/distributed/` | ADR-058 #6 |
| Pi-brain witness payload | `crates/mcp-brain-server` | XXH3 only |
| Sparsifier samplers | `crates/ruvector-sparsifier/src/sampler.rs` | Power-of-2 |
| LSH sketch (mincut attention) | `crates/ruvector-attn-mincut` | Hand-picked p |
### What We Will Build
| Item | Owner | Phase |
|---------------------------------------------------------|--------------|-------|
| `primality.rs` + benches + property tests | core | 0 |
| `PRIMES_BELOW_2K` / `PRIMES_ABOVE_2K` tables + `build.rs` regen + CI cross-check vs MR | core | 0 |
| Shard-router `--feature prime-shard` switch (uses table fast path) | distributed | 1 |
| HNSW prime-bucket capacity strategy (uses table fast path) | hnsw | 2 |
| Certified-prime LSH modulus (`p = next_prime(universe)`, general MR path) | sketches | 3 |
| Witness-chain `Option<EphemeralPrimeFingerprint>` field (general MR path) | brain | 4 |
| Optional: prime-cardinality PQ codebooks | cnn / quant | 5 |
### Generation Strategy: Table Fast Path + Miller-Rabin Fallback
Three of the five integration sites (shard router, HNSW buckets,
sparsifier strides) request primes near **fixed power-of-two sizes**
that never change between releases. For these we ship a static table
of "largest prime < 2^k" for k [8, 64] (~456 bytes, ~1 KB combined
with the symmetric `_ABOVE_` table) and route those calls to a single
L1-cached load — **zero Miller-Rabin work at runtime**.
The two unpredictable sites (LSH universe, witness ephemeral primes)
fall through to the general Miller-Rabin descent path at ~250 ns per
call. Both are cold paths (index-build time and per-share, respectively).
Crucially, **Miller-Rabin remains the source of truth.** The tables are
generated by a `build.rs` script that calls the MR implementation, and
a `#[test]` re-validates every entry under `cargo test`. The table is
an *amortization* of MR to compile time, not a replacement for it.
This refinement keeps the proposal's runtime cost honest: PIAL adds
≤ 1 ns to the hottest paths (shard routing, HNSW probe sequences) and
~250 ns to the coldest paths (one-shot index build, per-share fingerprint).
### Determinism Guarantees
| Range | Witnesses | Result |
|--------------|---------------------------------------------------|-----------------|
| `n < 2^32` | 2, 7, 61 | Deterministic |
| `n < 2^64` | 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37 | Deterministic |
| `n < 2^128` | 40 random rounds | Pr[err] < 2⁻⁸⁰ |
Tests will pin every documented "hard" pseudoprime (e.g. 3215031751,
2152302898747) so the deterministic guarantee is regression-protected.
### Hot-Path Avoidance
Modulo-by-prime is a hardware *division* and would dominate any inner loop
that runs it per-element. To avoid this we will:
1. Compute the prime **once** per shard-rebalance / index-build.
2. Wrap it in **Lemire fastmod** (`u64 → u32` reduction with one multiply
and one shift) so the per-element cost matches `& mask` to within ~1 ns.
3. Cache the fastmod constants alongside the modulus in the shard / HNSW /
LSH structures.
This is what makes prime moduli cheap enough to use *everywhere*; without
fastmod the proposal would not pencil out.
## Consequences
### Positive
- **Closes ADR-058 finding #6** without the cost of switching the primary
hash function.
- Restores the **2-independence guarantee** of the LSH families used by
sparsifier and mincut attention — these were silently degraded.
- Gives the pi-brain witness chain a **second, cheap-to-add line of defense**
with per-share entropy, addressing a long-standing gap.
- Adds a small, broadly useful **building block** to
`ruvector-collections` that has zero new external dependencies.
- All work is **tier-1 / tier-2** under ADR-026 — no Opus tokens needed for
the bulk of the implementation.
### Negative
- Five integration sites must each be reviewed and benchmarked. The PRD's
staged rollout is mandatory — a big-bang merge would be hard to reason
about.
- Modulo-by-prime is slower than mask if `fastmod` is forgotten. We mitigate
by *requiring* fastmod in the integration patches and gating CI on a
micro-benchmark that catches the regression.
- WASM `u128` is ~5× slower than native; the `u128` mode is therefore
opt-in and will be cfg-gated out of WASM bundles by default.
- The witness-chain change is wire-format-adjacent. We make it a backward
compatible `Option<…>` field; verifiers must accept payloads that lack it.
### Neutral / Followups
- Future work could explore LucasLehmer for explicitly Mersenne-shaped
moduli (e.g. `2^61 1`) — a separate ADR if benchmarks warrant.
- A `PrimeModHash<H>` newtype wrapper is the most likely next abstraction;
we'll prototype it in Phase 1 and decide.
## Alternatives Considered
| Option | Why rejected |
|-----------------------------------------------------|--------------------------------------------------------------------|
| Use `num-prime` or `glass_pumpkin` crate | New external dep, allocates, > 100 KB WASM cost |
| Hard-code a static table of "good" primes | Doesn't adapt to runtime resharding; exhausted at 2³² |
| Switch shard hash to BLAKE3 (cryptographic) | 810× slower than XXH3; ADR-058 already declined this |
| Probabilistic-only Miller-Rabin everywhere | Unnecessary uncertainty in the hot path; deterministic is free |
| Build a new `ruvector-primes` crate | Adds a 61st workspace crate for ~250 lines of code; not worth it |
| Do nothing | Leaves five known-bad subsystems on the floor |
## Security Considerations
- Miller-Rabin alone is **not** a cryptographic prime generator; we never
claim it as one. The witness-chain use (§4.4 of the PRD) layers it
*alongside* an existing XXH3 fingerprint and a future TEE-backed
signature (ADR-042) — defense in depth, not standalone integrity.
- Per-share ephemeral primes are derived from `SHA256(payload)[0..8]` so
they cannot be precomputed by an attacker who has not seen the payload.
An attacker who *has* seen the payload still needs to forge the original
XXH3 fingerprint as well, which is the existing security baseline.
- The `u128` probabilistic mode is **never** exposed to externally-supplied
numbers in default builds; it is gated behind `--feature unstable-u128`.
## Acceptance Criteria
A reviewer should be able to verify ADR-151 is "Done" when:
1. `cargo test -p ruvector-collections primality` is green and includes
pinned-pseudoprime regressions (e.g. 3215031751, 2152302898747).
2. `cargo test -p ruvector-collections primality::table_cross_check`
re-validates **every entry** of `PRIMES_BELOW_2K` and
`PRIMES_ABOVE_2K` against the Miller-Rabin descent, confirming the
table is consistent with the source-of-truth implementation.
3. `cargo bench -p ruvector-collections primality` reports
`is_prime_u64 ≤ 50 ns`, `prev_prime_below_pow2 ≤ 1 ns` (table fast
path), and `next_prime_u64(arbitrary N) ≤ 2 µs` (general MR path) on
M-series.
4. ruvector-graph shard router under `--feature prime-shard` shows
≥ 30% reduction in shard-load std-dev on the Zipfian micro-bench.
5. micro-hnsw-wasm p99 insert latency at 1 M vectors drops by ≥ 15%.
6. The pi-brain `brain_share` payload tolerates *both* presence and
absence of the new ephemeral-prime field across two release versions.
7. WASM bundle size growth: `micro-hnsw-wasm` ≤ +2 KB, `mcp-brain-server`
≤ +1.5 KB, prime tables ≤ +1 KB total.
---
## Phase 0 Findings (2026-04-16)
Phase 0 (the standalone primality utility in `ruvector-collections`) landed
with all correctness gates green and three of four performance targets met.
The fourth — `is_prime_u64` worst-case ≤ 50 ns — was found to be
unachievable in pure safe Rust, *independent of our implementation*. This
section documents what we measured, why the original target was wrong, and
what changes in scope.
### What landed
- `src/primality_kernel.rs` — shared MR core, `include!`d by both
`build.rs` and `src/primality.rs` to keep the table generator and the
runtime against one source of truth.
- `src/primality.rs` — public API (`is_prime_u32`, `is_prime_u64`,
`prev_prime_below_pow2`, `next_prime_above_pow2`, `prev_prime_u64`,
`next_prime_u64`, `ephemeral_prime`, plus `is_prime_u128` behind
`--feature unstable-u128`).
- `build.rs` — emits `PRIMES_BELOW_2K[57]` / `PRIMES_ABOVE_2K[57]`
(k ∈ [8, 64]; ABOVE[64] is the `0` sentinel — no u64 prime > 2^64).
- `tests/primality_pseudoprimes.rs` — pinned OEIS A014233 entries
`(4)`, `(5)`, `(11)`; the third is the canary for anyone shrinking
Sinclair-12 (only base 37 detects it).
- `tests/table_cross_check.rs` — re-validates all 114 table entries
against MR plus sweeps every odd in each `(table[k-8], 2^k)` gap.
Runtime: ~milliseconds (the *gap* is small — typically ≤ 100 odds).
- `benches/primality.rs` — four criterion benches per PRD §6.
### Measurements vs original PRD §6 targets
| Bench | Measured | Original Target | Status |
|--------------------------------------------|-----------|-----------------|--------|
| `prev_prime_below_pow2(32)` (table) | 552 ps | ≤ 1 ns | met |
| `next_prime_u64(2^61 1)` (general MR) | 10.97 µs | ≤ 12 µs | met |
| `next_prime_u64(arbitrary ≈ 1e9)` | 2.23 µs | ≤ 2 µs | +11% |
| `is_prime_u64(u64::MAX 58)` worst-case | 15.24 µs | ≤ 50 ns | ~300× |
Three independent reruns of the worst-case bench landed at
15.24 / 15.79 / 15.65 µs — stable within ±2%, not measurement noise.
### Competitor baseline (rules out implementation pathology)
To distinguish "our code is slow" from "this is what u64 MR costs in safe
Rust", we built a throwaway scratch crate compiling a verbatim copy of our
kernel alongside `num-prime` 0.4.4. Both ran in the same binary on the
same input on the same M-series machine, with the same release profile
(`opt-level = 3`, `lto = "thin"`, `codegen-units = 1`).
| Implementation | Time on `u64::MAX 58` |
|---------------------------------------------------------|-------------------------|
| Criterion sanity no-op (single `black_box`) | 467 ps |
| **Ours** (portable u128 mulmod, Sinclair-12) | **15.63 µs** |
| **`num-prime` 0.4.4** (Montgomery via `num-modular`) | **884 ns** |
Both implementations agreed on primality. The 467 ps sanity baseline
confirms criterion is reporting honestly. Conclusions:
1. The 15.63 µs measurement is real, not a tooling artifact.
2. There is a **17.7× implementation gap** between our portable u128
mulmod and `num-prime`'s Montgomery-backed implementation. This is
the single recoverable optimization in pure safe Rust.
3. `num-prime` itself is **17.7× over the original 50 ns target**. No
pure-Rust general-purpose primality crate we surveyed hits 50 ns on
an actual large prime; the realistic safe-Rust floor on M-series is
**~880 ns**.
4. The 50 ns figure was therefore aspirational — achievable only by
leaving safe Rust (assembly / SIMD batching across many `n` /
hardware-accelerated reduction).
### Revised performance targets
PRD §6 is amended in the same PR. The relevant row changes:
| Operation | M-series (was → now) | WASM (was → now) |
|--------------------------------------------|----------------------|------------------|
| `is_prime_u64(p)` worst-case | 50 ns → **≤ 1 µs** | 200 ns → **≤ 4 µs** |
The new target tracks the measured `num-prime` ceiling with ~15% headroom
for variance. All other §6 rows remain unchanged. The current 15.24 µs
implementation does not meet the new target either — Phase 0.1 closes the
gap (see below).
### Phase 0.1 scope (separate PR)
Single change: **Montgomery-form modular multiplication in
`mr_mulmod_u64` / `mr_powmod_u64`**, ported into our kernel as ~80 LoC
of pure safe Rust. Expected speedup 15-18× → lands at the ~880 ns floor.
Validation: criterion bench requires mean ≤ 1.0 µs with `p < 0.01`
vs the Phase 0 baseline. No change to the public API or the table /
cross-check architecture.
### Explicitly rejected from Phase 0.1
- **The 7-witness "Sinclair" set** `{2, 325, 9375, 28178, 450775,
9780504, 1795265022}`. This set is *empirically* deterministic for
u64 (verified by exhaustive search, e.g. miller-rabin.appspot.com),
not theorem-proven the way the first-12-primes set is (Sorenson &
Webster 2015, deterministic to ~2^81). Trading textbook provenance
for a 1.7× speedup is a bad deal when Montgomery alone gives
15-18×. Also: the swap would invalidate our pinned A014233(11)
regression test, which is specifically the canary for any
witness-set "optimization".
- **Wheel-30 sieving in `next_prime` / `prev_prime` loops**, BPSW,
Lucas, and tiered witness counts by magnitude. All sound but not
on the Phase 0.1 critical path. Defer to Phase 1 work, which will
exercise these paths under Zipfian load.
### Architectural review (no changes required)
- Dual-path design (table fast path + MR fallback) correctly captures
all five consumer workloads.
- `tests/table_cross_check.rs` is sufficient as the source-of-truth gate;
the `0.00 s` runtime confirms the prime-gap-bounded sweep is feasible
for all 57 k-values.
- `include!` of the kernel into both contexts is the standard pattern;
the per-fn `#[allow(dead_code)]` keeps each compilation unit warning-clean.
- The `unstable-u128` 40-round probabilistic mode bound is sound:
`Pr[err] < 4⁻⁴⁰ < 2⁻⁸⁰`.
---
## Notes for Reviewers
This ADR's *creative* contribution is not Miller-Rabin itself (textbook,
1976) — it is the observation that **one tiny utility unlocks five
independently identified pathologies** across hashing, sharding, sketching,
adjacency, and witnessing in a workspace that today has no primality
infrastructure at all. The PRD goes deeper on each use-case; this ADR
binds the architectural choices.

View file

@ -0,0 +1,424 @@
# External Review Request — PIAL Phase 0 (Miller-Rabin Primality)
You are an objective reviewer of a freshly-landed Phase-0 PR in a Rust workspace
(`ruvector`). The PR adds a deterministic Miller-Rabin primality utility plus
build-time prime tables. Three of four bench targets are met; one is missed by
~300×. The team needs an objective plan that:
1. Sanity-checks correctness (we may have blind spots).
2. Proposes ranked optimizations for the missed target — with a *measurement
methodology* for each, not just claims.
3. Identifies any architectural concerns we are missing.
Constraints we cannot relax:
- **Pure Rust, `core`-only.** No external prime/big-integer crates (`num-prime`,
`glass_pumpkin`, etc. were rejected in the binding ADR).
- **Allocation-free, `no_std`-friendly.** Hot paths run in WASM bundles.
- **Sinclair-12 witnesses are non-negotiable** for the deterministic u64 path
unless you can cite a smaller deterministic set proven for `n < 2^64`.
- **Source-of-truth invariant**: build-time tables and runtime tests must be
generated by *the same* MR implementation. Don't propose schemes that fork
the truth source.
---
## 1. Binding context (ADR-151 summary)
Five subsystems in a 60+-crate workspace need prime moduli (shard router, HNSW
adjacency, sparsifier strides, mincut LSH, pi-brain witness chain). Today they
all use `mod 2^k` and have documented pathologies. ADR-151 introduces *one*
shared utility — `crates/ruvector-collections/src/primality.rs` — that all five
will adopt across phases 15 (this PR is Phase 0 only: the utility itself).
Design:
- Deterministic MR for `u32` (witnesses {2, 7, 61}) and `u64` (Sinclair-12:
{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}).
- Probabilistic MR for `u128` behind `--feature unstable-u128`, 40 rounds,
error `< 2⁻⁸⁰`.
- **Dual path**: a build-time-generated table `PRIMES_BELOW_2K[57]` /
`PRIMES_ABOVE_2K[57]` covers k ∈ [8, 64] for power-of-two-aligned callers
(~1 ns table load); arbitrary inputs fall through to the general MR descent.
- The table is generated by `build.rs` calling the *same* MR kernel that ships
at runtime (via `include!`). A `tests/table_cross_check.rs` re-validates
every entry under `cargo test`, so MR remains source of truth.
Acceptance gates:
1. `cargo test -p ruvector-collections primality` green, including pinned
pseudoprime regressions (OEIS A014233 entries 4, 5, 11).
2. Table cross-check validates all 114 entries against MR.
3. Bench targets met:
- `is_prime_u64` worst-case ≤ 50 ns (M-series), ≤ 200 ns (WASM)
- `prev_prime_below_pow2(k)` ≤ 1 ns (table)
- `next_prime_u64(arbitrary)` ≤ 2 µs
- `next_prime_u64(2^61)` ≤ 12 µs
---
## 2. The implementation as shipped
### 2.1 Shared kernel — `src/primality_kernel.rs`
`include!`d into both `build.rs` and `src/primality.rs`. Inner attributes are
disallowed in `include!`d files, hence per-fn `#[allow(dead_code)]`.
```rust
// Deterministic Miller-Rabin kernel — ADR-151 (PIAL).
//
// `include!`d into two contexts (build.rs and src/primality.rs) which use
// different subsets of the symbols. Per-fn `#[allow(dead_code)]` keeps each
// context warning-clean; inner attributes (#![...]) aren't legal in
// included files.
#[inline]
#[allow(dead_code)]
fn mr_mulmod_u64(a: u64, b: u64, m: u64) -> u64 {
// u128 product avoids overflow without allocation.
((a as u128).wrapping_mul(b as u128) % (m as u128)) as u64
}
#[inline]
#[allow(dead_code)]
fn mr_powmod_u64(mut base: u64, mut exp: u64, m: u64) -> u64 {
if m == 1 {
return 0;
}
let mut acc: u64 = 1;
base %= m;
while exp > 0 {
if exp & 1 == 1 {
acc = mr_mulmod_u64(acc, base, m);
}
exp >>= 1;
if exp > 0 {
base = mr_mulmod_u64(base, base, m);
}
}
acc
}
// Returns true iff `a` is a Miller-Rabin witness of compositeness for `n`.
// Caller guarantees: n is odd, n > 3, and a in [2, n-2]. n - 1 = d * 2^s
// with d odd (passed in pre-decomposed for speed).
#[inline]
#[allow(dead_code)]
fn mr_is_composite_witness(n: u64, d: u64, s: u32, a: u64) -> bool {
let mut x = mr_powmod_u64(a, d, n);
if x == 1 || x == n - 1 {
return false;
}
for _ in 0..s.saturating_sub(1) {
x = mr_mulmod_u64(x, x, n);
if x == n - 1 {
return false;
}
}
true
}
#[inline]
#[allow(dead_code)]
fn mr_is_prime_u64(n: u64) -> bool {
if n < 2 {
return false;
}
const SMALL_PRIMES: [u64; 12] = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37];
for &p in &SMALL_PRIMES {
if n == p {
return true;
}
if n.is_multiple_of(p) {
return false;
}
}
// n is now odd, > 37, and coprime to every Sinclair witness.
let mut d = n - 1;
let mut s: u32 = 0;
while d & 1 == 0 {
d >>= 1;
s += 1;
}
for &a in &SMALL_PRIMES {
if mr_is_composite_witness(n, d, s, a) {
return false;
}
}
true
}
#[inline]
#[allow(dead_code)]
fn mr_is_prime_u32(n: u32) -> bool {
mr_is_prime_u64(n as u64)
}
#[inline]
#[allow(dead_code)]
fn mr_prev_prime_u64(upper: u64) -> u64 {
if upper <= 2 { return 0; }
if upper == 3 { return 2; }
let mut n = upper - 1;
if n & 1 == 0 { n -= 1; }
loop {
if mr_is_prime_u64(n) { return n; }
if n <= 3 { return 2; }
n -= 2;
}
}
#[inline]
#[allow(dead_code)]
fn mr_next_prime_u64(lower: u64) -> u64 {
if lower < 2 { return 2; }
if lower < 3 { return 3; }
let largest_u64_prime: u64 = u64::MAX - 58;
if lower >= largest_u64_prime { return 0; }
let mut n = lower + 1;
if n & 1 == 0 { n += 1; }
loop {
if mr_is_prime_u64(n) { return n; }
n += 2;
}
}
```
### 2.2 Public API — relevant excerpts from `src/primality.rs`
```rust
include!("primality_kernel.rs");
include!(concat!(env!("OUT_DIR"), "/prime_tables.rs"));
// ↑ provides: pub const PRIMES_BELOW_2K: [u64; 57]
// pub const PRIMES_ABOVE_2K: [u64; 57] (last entry = 0 sentinel)
#[inline]
pub fn is_prime_u32(n: u32) -> bool { mr_is_prime_u32(n) }
#[inline]
pub fn is_prime_u64(n: u64) -> bool { mr_is_prime_u64(n) }
#[inline]
pub fn prev_prime_below_pow2(k: u32) -> u64 {
debug_assert!((8..=64).contains(&k));
PRIMES_BELOW_2K[(k - 8) as usize]
}
#[inline]
pub fn next_prime_above_pow2(k: u32) -> u64 {
debug_assert!((8..=63).contains(&k));
PRIMES_ABOVE_2K[(k - 8) as usize]
}
#[inline]
pub fn prev_prime_u64(n: u64) -> u64 {
if n.is_power_of_two() {
let k = n.trailing_zeros();
if (8..=64).contains(&k) {
return PRIMES_BELOW_2K[(k - 8) as usize];
}
}
mr_prev_prime_u64(n)
}
#[inline]
pub fn next_prime_u64(n: u64) -> u64 {
if n.is_power_of_two() {
let k = n.trailing_zeros();
if (8..=63).contains(&k) {
return PRIMES_ABOVE_2K[(k - 8) as usize];
}
}
mr_next_prime_u64(n)
}
#[inline]
pub fn ephemeral_prime(seed: u64) -> u64 {
// Used by pi-brain witness chain (ADR §4.4) for per-share entropy.
let mask = (1u64 << 61) - 1;
let s = (seed | 1) & mask;
if mr_is_prime_u64(s) { s } else { mr_next_prime_u64(s) }
}
// u128 probabilistic mode (cfg-gated on `unstable-u128`):
#[cfg(feature = "unstable-u128")]
pub fn is_prime_u128(n: u128, rounds: u8) -> bool { /* … 40-round MR with
a tiny seeded LCG for witness selection; deferred to mr_is_prime_u64
when n <= u64::MAX */ }
```
### 2.3 Tests asserting correctness
- `tests/primality_pseudoprimes.rs`:
- `is_prime_u64(3_215_031_751) == false` (OEIS A014233(4), spsp to {2,3,5,7})
- `is_prime_u64(2_152_302_898_747) == false` (A014233(5))
- `is_prime_u64(3_825_123_056_546_413_051) == false` (A014233(11), detected
only by base 37 — canary for anyone shrinking Sinclair-12)
- All primes/composites in [0, 100], 7 Carmichael numbers, edges
(0, 1, u64::MAX, u64::MAX 58, largest u32 prime).
- `tests/table_cross_check.rs`:
- For each k ∈ [8, 64]: assert `is_prime_u64(PRIMES_BELOW_2K[k-8])` and
sweep every odd integer in `(table[k-8], 2^k)` asserting non-primality.
- Symmetric for k ∈ [8, 63] on `PRIMES_ABOVE_2K`.
- Sentinel: `PRIMES_ABOVE_2K[64-8] == 0`.
---
## 3. Measurements (criterion, M-series, release profile)
### 3.1 Phase-0 benches against the PRD targets
| Bench | Measured | Target | Status |
|--------------------------------------------|-----------|---------|--------|
| `prev_prime_below_pow2(32)` (table) | 552 ps | ≤ 1 ns | green |
| `next_prime_u64(2^61 1)` general MR | 10.97 µs | ≤ 12 µs | green |
| `next_prime_u64(arbitrary ≈ 1e9)` general | 2.23 µs | ≤ 2 µs | +11% |
| `is_prime_u64(u64::MAX 58)` worst-case | **15.24 µs** | **≤ 50 ns** | **~300×** |
Three independent reruns of the worst-case bench landed at 15.24 / 15.79 /
15.65 µs — stable within ±2%, not measurement noise.
### 3.2 Apples-to-apples competitor baseline
To rule out "this machine is slow today" or "criterion is mismeasuring", we
built a throwaway scratch crate (outside the workspace) that compiles a
verbatim copy of our kernel alongside `num-prime` 0.4.4. Both run in the
same binary on the same input, with the same release profile
(`opt-level = 3`, `lto = "thin"`, `codegen-units = 1`).
| Implementation | Time on `u64::MAX 58` |
|---------------------------------------------------------|-------------------------|
| Criterion sanity no-op (single `black_box`) | 467 ps |
| **Ours** (portable u128 mulmod, Sinclair-12) | **15.63 µs** |
| **`num-prime` 0.4.4** (Montgomery via `num-modular`) | **884 ns** |
| PRD §6 target | 50 ns |
Both implementations agreed on primality (no correctness gap). The 467 ps
sanity baseline confirms criterion is reporting honestly — broken benches
don't produce 467 ps for a no-op.
**What this tells us:**
1. **Our 15.63 µs is real and reproducible**, not a measurement artifact.
2. **We are ~17.7× slower than `num-prime`** on the same input. The
delta is almost certainly Montgomery-form modular multiplication
(`num-prime` pulls `num-modular`, which provides exactly that).
3. **`num-prime` itself is ~17.7× slower than the 50 ns target.** No
pure-Rust general-purpose primality crate we know of hits 50 ns on an
actual large prime; the realistic safe-Rust floor on M-series appears
to be ~880 ns.
4. The PRD's 50 ns figure is therefore *unachievable* in safe Rust — it
would require Montgomery + assembly / SIMD batching across many `n` /
leaving the safe subset entirely.
ADR-151 forbids `num-prime` as a *runtime* dependency, but does not forbid
us from porting Montgomery into our own kernel — `num-modular` is
MIT/Apache and the technique itself is textbook. That is now a *measured*
optimization target with a known ceiling, not a guess.
---
## 4. What we are asking you to do
Produce **one document** with the four sections below. Be specific. Cite
sources where possible. Do not propose changes that would violate the
constraints in the preamble.
### Section A — Correctness audit
Read §2.1 and §2.2. Identify:
1. Any soundness bug (a composite that would be reported prime, or vice
versa) within the documented input ranges.
2. Edge cases not covered by the tests in §2.3 that you would add.
3. Any way the table cross-check could pass while masking a real bug
(i.e. is the test actually load-bearing?).
4. Risks specific to `ephemeral_prime`'s seed → prime mapping when used
for witness-chain fingerprinting (ADR §4.4): collisions, attacker
precomputation, distribution issues.
### Section B — Performance plan, ranked
The sharpened goal, given §3.2's competitor baseline: **close the 17.7×
gap to `num-prime` (15.63 µs → ~880 ns) in pure safe Rust, without taking
`num-prime` or `num-modular` as a runtime dependency**, AND hit the
`next_prime_u64` arbitrary 2 µs target. Treat 50 ns as aspirational; we
expect you to recommend a revised PRD target with justification.
For each proposal:
- **Mechanism**: what changes in code (one paragraph, no hand-waving;
reference §2.1 line ranges where applicable).
- **Expected speedup vs our 15.63 µs baseline**: cite source or give a
back-of-envelope; if the technique is what `num-prime` uses, say so.
- **Cost**: code complexity (LoC, conceptual difficulty for reviewers),
WASM bundle size, any new `unsafe`.
- **Compatibility**: does it preserve the source-of-truth invariant
(build.rs and runtime use the same kernel via `include!`)? Does it
break `no_std`?
- **Validation methodology**: the *exact* benchmark and regression test
you would add to prove the speedup is real and stable, including
the criterion config you would use and the statistical threshold for
declaring "passed".
Rank proposals by `expected_speedup × feasibility / complexity`.
Candidate techniques to consider (extend or reject as you see fit):
- **Montgomery-form modular arithmetic** — likely the single biggest
lever based on the `num-prime` comparison. We want a concrete sketch
of the API change and a LoC estimate for porting it into our kernel.
- Wheel factorization (mod 30 / mod 210) for the small-prime screen.
- Branchless witness loops.
- Reduced witness sets for sub-ranges (e.g. {2} for n < 2047,
{2, 3} for n < 1.4 × 10^9).
- Strong-base early-exit ordering (which witness fails fastest on
random composites?).
- BPSW (Baillie-PSW) instead of MR — different correctness story; we'd
need a citation for deterministic-up-to-2^64 status.
- Strong Lucas as a deterministic add-on.
- Pre-screen by Pollard rho for small-factor composites (does this even
beat trial division for the tiny gap between 37 and our actual call
rate?).
Specifically address:
1. **Realistic safe-Rust floor for `is_prime_u64` worst-case on M-series.**
Our measurement suggests ~880 ns (matching `num-prime`). Confirm or
refute, with reasoning.
2. **Recommended revised PRD target**, given that floor.
3. **The `next_prime_u64(arbitrary)` 2.23 µs vs 2 µs gap** — is this
meaningful or noise-band? If real, what closes it?
### Section C — Architectural review
1. Is the dual-path design (table + MR fallback) correctly capturing the
workload of the five named consumers (shard router, HNSW buckets,
sparsifier, mincut LSH, witness chain)? Any consumer where the table
would mislead?
2. Is `tests/table_cross_check.rs` sufficient as the source-of-truth gate,
or is there a stronger invariant we should assert?
3. Does `include!` of the kernel into both `build.rs` and `src/primality.rs`
create any failure mode you have seen burn other projects?
4. The `unstable-u128` feature uses Russian-peasant `mulmod_u128` and a
tiny seeded LCG for witness selection. Is that sound for the
probabilistic guarantee `Pr[err] < 2⁻⁸⁰` at 40 rounds?
### Section D — Validation methodology
For the *whole* Phase-0 deliverable, propose:
1. The minimum set of CI gates that would catch a regression in either
correctness or performance, and where they should run (PR / nightly /
release).
2. A reproducible benchmark harness that distinguishes signal from noise
on contended hardware (criterion is fine; what statistical thresholds
would you set for "pass"?).
3. A property-test (proptest/quickcheck-style) strategy that would
complement the pinned regressions in §2.3 without re-deriving MR.
4. Anything you would add to the `tests/` or `benches/` directory before
merging Phase 0.
---
## 5. Format of your response
Plain markdown. Sections A/B/C/D headed exactly as above. For Section B,
use a table sorted by your ranking. End with a one-paragraph **Verdict**:
should the PR merge as-is, merge with the PRD §6 row relaxed, or block
on a specific change?
Do not be polite. If a proposal in our implementation is wrong, say so
directly with line-numbered references into §2.1 / §2.2.

View file

@ -0,0 +1,113 @@
# Handoff — Phase 0 Kickoff (PIAL)
You are starting **Phase 0** of PIAL (Prime-Indexed Acceleration Layer):
land the Miller-Rabin primality utility in `ruvector-collections` and
nothing else. Five integration phases follow in separate PRs.
## Read first (in order)
1. **`docs/adr/ADR-151-miller-rabin-prime-optimizations.md`** — the binding
decision (status, scope, acceptance criteria, alternatives rejected).
2. **`docs/research/miller-rabin-optimizations/PRD.md`** — full design,
five creative use-cases, performance targets, six-phase rollout, risks.
3. **This file** — Phase 0 specifics. Do not skip.
## Branch
`feat/miller-rabin-prime-optimizations` (off `main`). Already created.
## Target crate
`crates/ruvector-collections/` already exists in the workspace. Today it
contains `collection.rs`, `error.rs`, `lib.rs`, `manager.rs`. No
`benches/` directory and no `build.rs` yet — both are Phase 0 work.
## Phase 0 Deliverables (four files, one PR)
| File | Purpose | Source of truth |
|---|---|---|
| `src/primality.rs` | Deterministic Miller-Rabin for u32/u64; probabilistic for u128; tabled `prev_prime_below_pow2` / `next_prime_above_pow2` fast paths; general `prev_prime_u64` / `next_prime_u64` MR-descent paths; `ephemeral_prime(seed)` for the witness chain | PRD §5 |
| `build.rs` | Generate `PRIMES_BELOW_2K[57]` and `PRIMES_ABOVE_2K[57]` (k ∈ [8, 64]) from the MR implementation at compile time; emit as `${OUT_DIR}/prime_tables.rs` for `include!`-inclusion in `primality.rs` | ADR-151 "Generation Strategy" |
| `benches/primality.rs` | Criterion benches: `is_prime_u64`, `prev_prime_below_pow2`, `next_prime_u64(arbitrary)`, `next_prime_u64(2^61)`. Targets in PRD §6 | PRD §6 |
| `tests/table_cross_check.rs` | For every k ∈ [8, 64], assert `is_prime_u64(PRIMES_BELOW_2K[k-8])` is true and that no prime exists in `(PRIMES_BELOW_2K[k-8], 2^k)`. Same for `_ABOVE_`. This is the gate that makes MR the source of truth | ADR-151 acceptance #2 |
## Library wiring
Add `pub mod primality;` to `crates/ruvector-collections/src/lib.rs` and
re-export the public API at the crate root. Update the crate-level
doc-comment to mention the new module.
## Dependencies — explicitly do not add
The PRD rejects `num-prime`, `glass_pumpkin`, and any other external
prime/big-integer crates. Use **only** `core` integer arithmetic.
Add `criterion` under `[dev-dependencies]` for benches if it is not
already inherited via the workspace.
## Witnesses (the whole correctness story in three lines)
- `u32`: `{ 2, 7, 61 }` — Pomerance/Selfridge/Wagstaff. Deterministic.
- `u64`: `{ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37 }` — Sinclair (2011). Deterministic.
- `u128`: 40 random rounds, **only** behind `--feature unstable-u128`. Probabilistic, error < 2⁻⁸⁰.
## Pinned pseudoprime regressions
Include these in `tests/primality_pseudoprimes.rs` so future witness-set
"optimizations" cannot silently regress correctness:
- `3_215_031_751` — strong pseudoprime to bases {2, 3, 5, 7} (must be detected by Sinclair-12).
- `2_152_302_898_747` — strong pseudoprime to {2, 3, 5, 7, 11}.
- `3_825_123_056_546_413_051` — large 64-bit known-hard composite.
Add small-prime sanity (1, 2, 3, 4, 5, 7, 9, ..., 100) and edge cases
(0, 1, `u64::MAX`, `u64::MAX - 58` which is the largest u64 prime).
## Performance targets (from PRD §6)
| Operation | M-series | WASM |
|---|---|---|
| `is_prime_u64` worst-case | ≤ 50 ns | ≤ 200 ns |
| `prev_prime_below_pow2(k)` (table) | ≤ 1 ns | ≤ 2 ns |
| `next_prime_u64(2^32)` (table) | ≤ 1 ns | ≤ 2 ns |
| `next_prime_u64(arbitrary N)` (general MR) | ≤ 2 µs | ≤ 8 µs |
| `next_prime_u64(2^61)` (general MR) | ≤ 12 µs | ≤ 40 µs |
## Phase 0 is "Done" when
ADR-151 acceptance criteria #1, #2, #3 are all green:
1. `cargo test -p ruvector-collections primality` passes (includes pinned pseudoprimes).
2. `cargo test -p ruvector-collections primality::table_cross_check` validates all 114 table entries against MR.
3. `cargo bench -p ruvector-collections primality` meets the targets above on M-series.
**Do not start Phase 1 in this PR.** Phases ship as separate PRs
(PRD §7). Keep this one tightly scoped to the utility itself.
## First commands in the new session
```bash
# Confirm you are on the right branch
git status # should show "On branch feat/miller-rabin-prime-optimizations" with no changes
# Baseline — confirm the crate compiles before you touch it
cargo check -p ruvector-collections
# Re-read the binding documents
cat docs/adr/ADR-151-miller-rabin-prime-optimizations.md | head -80
cat docs/research/miller-rabin-optimizations/PRD.md | sed -n '150,260p' # §5 API + §6 perf
```
Then start with `crates/ruvector-collections/src/primality.rs`. The
deterministic u64 Miller-Rabin is ~80 lines including comments;
everything else (tables via `build.rs`, benches, cross-check test)
follows mechanically from it.
## What is explicitly **not** Phase 0
- Editing `crates/ruvector-graph/` (that's Phase 1).
- Editing any HNSW crate (Phase 2).
- Editing sparsifier or attn-mincut (Phase 3).
- Editing `crates/mcp-brain-server/` or pi-brain payloads (Phase 4).
- Editing CNN / quantization codebooks (Phase 5).
If you find yourself touching any of those, stop and split the PR.

View file

@ -0,0 +1,369 @@
# PRD: Prime-Indexed Acceleration Layer (PIAL)
> Creative Miller-Rabindriven optimizations for ruvector's hashing,
> sharding, sketching, and witness-chain layers.
| Field | Value |
|--------------------|------------------------------------------------------|
| **Status** | Draft |
| **Date** | 2026-04-16 |
| **Owner** | RuVector Core / Architecture |
| **Related ADR** | ADR-151 (this PRD's binding decision record) |
| **Cross-refs** | ADR-027 (HNSW), ADR-038 (witness), ADR-058 (hash), |
| | ADR-148/149 (brain perf), ADR-150 (π-brain) |
| **Tier (ADR-026)** | T1 (Agent Booster eligible) for the core utility; |
| | T2 (Haiku) for the integration patches. |
---
## 1. Background
Three years of incremental work have left ruvector with several places where
**arithmetic on indices, hashes, and shard keys defaults to power-of-two
moduli** — convenient on hardware (`x & (N - 1)`), pathological on real data:
| Site | Current modulus | Failure mode |
|---------------------------------------------------|--------------------|------------------------------------------------------------|
| `ruvector-graph` shard router (ADR-058 #6) | `xxh3_64() mod 2^k`| ~50% collision @ 2³² nodes; biased on Zipfian keys |
| `micro-hnsw-wasm` adjacency map | open-addressed 2^k | clustering on near-duplicate vectors (e.g. timestamps) |
| `ruvector-sparsifier` stride sampler | power-of-2 stride | aliasing on lattice / image-grid graphs |
| `ruvector-attn-mincut` LSH sketch | ad-hoc constant | breaks 2-independence of universal hash family |
| pi-brain witness fingerprint (ADR-038) | XXH3 only | single-hash tamper risk; no per-share entropy |
The fix in every one of these is **the same primitive**: a fast, deterministic
primality test that lets us mint a prime *near a target size* on demand.
We choose **Miller-Rabin** because it is:
- **Deterministic** for all `u64` inputs with the Sinclair witness set
`{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}` — no probabilistic guarantees
needed for our hot paths.
- **O(k · log³ n)** — a `next_prime(2^32)` call costs ~2 µs in benchmarks;
amortized to zero against shard-rebalance cycles.
- **WASM-friendly** — pure integer arithmetic, no FFI, fits in <1 KB compiled.
- **Tier-1 eligible** under ADR-026 — pure transform work, no LLM in the loop.
This PRD frames a single, surgically scoped utility (`primality.rs`) that
*unblocks* a portfolio of creative optimizations across the workspace. The
binding architectural commitments live in ADR-151.
---
## 2. Goals
| # | Goal | Metric / Acceptance |
|---|----------------------------------------------------------------------|------------------------------------------------------|
| G1| Provide `is_prime`, `next_prime`, `prev_prime` over `u32`/`u64` | Deterministic, ≥ 200 M ops/s on M-series |
| G2| Re-shard ruvector-graph by prime modulus | ≥ 30% reduction in shard-load std-dev on Zipfian load|
| G3| Convert HNSW adjacency tables to prime-bucket open addressing | ≥ 15% drop in p99 insert latency at 1 M vectors |
| G4| Replace LSH stride/modulus constants with certified primes | Restore 2-independence; pass property tests |
| G5| Add per-share ephemeral prime fingerprint to π-brain witness chain | +8 bytes/share; published in `brain_share` payload |
| G6| Cross-target: the utility compiles for native, WASM, and `no_std` | Single crate, no feature-flag explosion |
## 3. Non-Goals
- **No cryptographic key generation.** Miller-Rabin alone is *not* a substitute
for proven-prime generation in RSA/ECC; we only use it for hashing/sharding.
- **No new heap allocations** in the inner loop — the utility must be
allocation-free past the (constant-size) witness array.
- **No replacement** of `prime-radiant` (which is a coherence-gate crate and
unrelated despite the name collision).
- **No big-integer support.** 64-bit (and an opt-in `u128` mode) is enough for
every ruvector use case identified above.
- **No SHAKE/HMAC redesign.** ADR-058's other findings stand independently.
---
## 4. Creative Use-Cases (the "why this is interesting")
### 4.1 Prime-Modulus Shard Routing — *direct fix for ADR-058 #6*
Today's shard router is `xxh3_64(node_id) & (shards - 1)`. The mask discards
all but `log₂(shards)` bits of entropy, which is exactly when adversarial /
Zipfian inputs cluster. Replacing it with `xxh3_64(node_id) % p`, where
`p = prev_prime(shards)`, recovers full entropy and gives provably balanced
buckets under universal hashing.
> **Creative twist:** because `prev_prime(k)` is cheap, we can *adapt* the
> modulus during a rolling re-shard (every N minutes) — the cluster never
> sees a power-of-two pathology because the modulus literally never *is* a
> power of two for two consecutive epochs.
### 4.2 Prime-Bucket HNSW Adjacency
`micro-hnsw-wasm` and `ruvector-hyperbolic-hnsw` store edges in open-addressed
tables sized to the next power of two. Probe-sequence collisions on
near-duplicate vectors (e.g. real-time sensor or timestamp embeddings) blow up
p99 insert latency. Switching to `prev_prime(2^k)` capacity with linear or
quadratic probing keeps the table size cache-friendly while breaking the
power-of-two clustering.
### 4.3 Certified Modulus for Universal LSH
Several sketch modules (`ruvector-attn-mincut`, sparsifier samplers) build
hash families of the form `((a · x + b) mod p) mod m`. The 2-independence
guarantee *requires* `p` to be prime and `> universe_size`. Today these are
hand-picked Mersenne-shaped constants (`2^61 1`, `2^31 1`); when the
universe grows past those bounds the family silently degrades. Miller-Rabin
lets us call `next_prime(universe_size)` on dataset load and store the chosen
modulus alongside the index.
### 4.4 Witness-Chain Ephemeral Primes (π-brain)
The pi-brain witness chain (ADR-038, CLAUDE.md "Witness Chain Rules")
currently fingerprints each shared memory with XXH3 only. We propose:
```text
share = { payload, fingerprint_xxh3, ephemeral_prime q, fingerprint_modq }
where q = next_prime( seed = SHA256(payload)[0..8] )
```
A tampering peer attempting to substitute payloads must collide *both*
fingerprints — including a hash modulo a prime `q` they cannot precompute,
because `q` is derived per-share. Cost: 8 bytes on the wire, ~2 µs at the
sender, ~50 ns at every verifier. The asymmetry is the point.
### 4.5 Anti-Aliasing Stride for Sparsifier Sampling
Spectral sparsifiers in `ruvector-sparsifier` use stride-based subsampling
when the importance sketch is too expensive. Power-of-two strides alias
brutally on grid-structured graphs (image, mesh, lattice). A prime stride
breaks the alignment for the same reason linear-congruential generators
demand prime moduli — borrowed wisdom, decades old, free to reuse.
### 4.6 Bonus: Prime-Sized Quantization Codebooks
Product-quantization codebooks (used by ruvector-cnn-wasm and ruQu) sized to
prime cardinalities show measurably better recall@k on standard benchmarks
than power-of-two codebooks because they break the implicit "code-of-codes"
correlation across sub-spaces. This is an opt-in mode, not a default.
---
## 5. Proposed Architecture
```
┌──────────────────────────────────────────────────────────────┐
│ crates/ruvector-collections/src/primality.rs (new, ~250 LoC) │
│ │
│ pub fn is_prime_u32(n: u32) -> bool // {2,7,61} │
│ pub fn is_prime_u64(n: u64) -> bool // Sinclair-12 │
│ pub fn is_prime_u128(n: u128, k: u8) -> bool // probabilistic│
│ pub fn next_prime_u64(n: u64) -> u64 │
│ pub fn prev_prime_u64(n: u64) -> u64 │
│ pub fn ephemeral_prime(seed: u64) -> u64 // for §4.4 │
│ │
│ #[cfg(target_arch = "wasm32")] // shares same impl │
└──────────────────┬───────────────────────────┬────────────────┘
│ │
┌──────────┴──────────┐ ┌─────────┴───────────┐
▼ ▼ ▼ ▼
shard router HNSW buckets LSH families witness chain
(ruvector-graph) (micro-hnsw) (sparsifier, (mcp-brain-server,
attn-mincut) pi-brain)
```
### Why `ruvector-collections`?
- It already houses cross-cutting data-structure utilities.
- All five consumers depend on it transitively, so no new edges in the
dependency graph.
- Keeps the workspace top-level crate count flat (we have 60+ already).
### Public API (sketch)
```rust
//! crates/ruvector-collections/src/primality.rs
//!
//! Deterministic Miller-Rabin primality for u32/u64 and probabilistic
//! Miller-Rabin for u128. Allocation-free, no_std-friendly.
//!
//! Hot-path strategy: tabled primes for the common power-of-two-aligned
//! sizes (zero runtime cost), Miller-Rabin descent as the general fallback.
#[inline]
pub const fn is_prime_u32(n: u32) -> bool { /* witnesses: 2, 7, 61 */ }
#[inline]
pub const fn is_prime_u64(n: u64) -> bool {
// Sinclair (2011): deterministic for all u64
// witnesses: 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37
}
pub fn is_prime_u128(n: u128, rounds: u8) -> bool { /* probabilistic */ }
// ── Generation: dual-path ────────────────────────────────────────────
//
// Fast path: lookup table for "largest prime < 2^k", k [8, 64].
// CI validates every entry against the Miller-Rabin descent at build
// time, so the table is never the source of truth — MR is.
const PRIMES_BELOW_2K: [u64; 57] = [
251, // < 2^8
509, // < 2^9
1021, // < 2^10
// ... entries for k = 11..=31 ...
4_294_967_291, // < 2^32 (shard-router common case)
// ... entries for k = 33..=63 ...
18_446_744_073_709_551_557, // < 2^64
];
#[inline]
pub const fn prev_prime_below_pow2(k: u32) -> u64 {
debug_assert!(k >= 8 && k <= 64);
PRIMES_BELOW_2K[(k - 8) as usize]
}
#[inline]
pub fn prev_prime_u64(n: u64) -> u64 {
// Fast path: power-of-two-aligned inputs (HNSW buckets, shard sizes)
if n.is_power_of_two() && n.trailing_zeros() >= 8 {
return prev_prime_below_pow2(n.trailing_zeros());
}
// General path: 6k±1 wheel + Miller-Rabin descent
miller_rabin_descent(n, Direction::Down)
}
#[inline]
pub fn next_prime_u64(n: u64) -> u64 {
if n.is_power_of_two() && n.trailing_zeros() >= 8 {
// Symmetric optional fast path: PRIMES_ABOVE_2K table
return next_prime_above_pow2(n.trailing_zeros());
}
miller_rabin_descent(n, Direction::Up)
}
pub fn ephemeral_prime(seed: u64) -> u64 {
// seed → next_prime((seed | 1) % 2^61) — used by witness chain (§4.4)
// No table — input is unpredictable by design.
}
```
### Why the dual-path matters
Three of PIAL's five generation sites (shard router, HNSW bucket sizing,
sparsifier strides) ask for primes near *fixed* sizes that never change
between releases. The table converts those calls into a single L1-cached
load — no Miller-Rabin work at runtime at all.
The two unpredictable sites (LSH universe, witness-chain ephemeral primes)
fall through to the general MR path. They're cold paths anyway —
microsecond-scale generation cost is invisible against the surrounding work.
**Crucially, MR is still the source of truth.** A `build.rs` script
regenerates `PRIMES_BELOW_2K` and `PRIMES_ABOVE_2K` from the MR
implementation on every build, and a `#[test]` cross-checks every entry
under `cargo test`. The table is an *amortization*, not a substitute.
| Generation site | Path taken | Runtime cost |
|-----------------------------|--------------------|--------------|
| Shard router (`prev_prime(2^k)`) | Fast (table) | ~1 ns |
| HNSW bucket (`prev_prime(2^k)`) | Fast (table) | ~1 ns |
| Sparsifier stride (table-friendly)| Fast (table) | ~1 ns |
| LSH modulus (`next_prime(N)`) | General (MR) | ~250 ns |
| Witness ephemeral (`next_prime(seed)`)| General (MR) | ~250 ns |
---
## 6. Performance Targets
> **Revised 2026-04-16 (Phase 0).** The original `is_prime_u64` worst-case
> target of 50 ns was found to be unachievable in pure safe Rust;
> `num-prime` itself measures ~880 ns on the same hardware. Target relaxed
> to track the empirical safe-Rust ceiling. See §6.1 and the Phase 0
> Findings section of ADR-151 for the full justification.
| Operation | Target (M-series) | Target (WASM) |
|------------------------------------------------|---------------------|--------------------|
| `is_prime_u64(p)` (worst-case) | **≤ 1 µs** *(was 50 ns)* | **≤ 4 µs** *(was 200 ns)* |
| `prev_prime_below_pow2(k)` (table fast path) | **≤ 1 ns** | **≤ 2 ns** |
| `next_prime_u64(2^32)` (table fast path) | **≤ 1 ns** | **≤ 2 ns** |
| `next_prime_u64(arbitrary N)` (general MR path)| ≤ 2 µs | ≤ 8 µs |
| `next_prime_u64(2^61)` (general MR path) | ≤ 12 µs | ≤ 40 µs |
| Shard re-route on 1 M nodes | ≤ 30 ms (one-shot) | n/a |
| HNSW p99 insert @ 1 M vectors | -15% vs baseline | -10% vs baseline |
| WASM bundle growth from `PRIMES_BELOW_2K`+`_ABOVE_2K` | n/a | ≤ 1 KB total |
Benchmarks live in `crates/ruvector-collections/benches/primality.rs` and run
under existing `npm run bench` infrastructure.
### 6.1 Empirical findings (Phase 0)
Phase 0 measurements on M-series, criterion release profile:
| Bench | Measured | Revised target | Status |
|--------------------------------------------|-----------|----------------|--------|
| `prev_prime_below_pow2(32)` | 552 ps | ≤ 1 ns | met |
| `next_prime_u64(2^61 1)` | 10.97 µs | ≤ 12 µs | met |
| `next_prime_u64(arbitrary ≈ 1e9)` | 2.23 µs | ≤ 2 µs | +11% |
| `is_prime_u64(u64::MAX 58)` worst-case | 15.24 µs | ≤ 1 µs | does not meet revised target — Phase 0.1 |
A throwaway scratch crate compiling a verbatim copy of our kernel
alongside `num-prime` 0.4.4 in the same binary on the same input
measured **ours = 15.63 µs, num-prime = 884 ns** (criterion sanity no-op
= 467 ps confirms harness honesty). The 17.7× gap is recoverable in pure
safe Rust by porting Montgomery-form modular multiplication into
`mr_mulmod_u64` / `mr_powmod_u64` (~80 LoC). That is Phase 0.1 scope and
ships in a separate PR; see ADR-151 "Phase 0 Findings" for the full plan
and the explicit rejection of the empirical 7-witness "Sinclair" set as
a correctness regression dressed as a perf win.
---
## 7. Rollout Plan
| Phase | Scope | Gate |
|-------|-------------------------------------------------------------------------|--------------------------------------------|
| **0** | Land `primality.rs` + tests + benches in `ruvector-collections` | `npm test && npm run lint` green |
| **1** | Wire `next_prime` into ruvector-graph shard router behind feature flag | A/B Zipfian load; ≥ 30% std-dev reduction |
| **2** | Convert HNSW adjacency to prime buckets (micro-hnsw-wasm first) | recall@k unchanged; p99 insert -15% |
| **3** | Switch sparsifier + attn-mincut LSH families to certified primes | property tests pass; no regression in cuts |
| **4** | Ship ephemeral-prime fingerprint in pi-brain witness payload (opt-in) | `brain_share` accepts new field; verifiers |
| | | tolerant of absence (backward compatible) |
| **5** | Optional: prime-sized PQ codebooks in ruvector-cnn-wasm | recall@10 ≥ baseline on SIFT-1M |
Each phase is a separate PR; no big-bang merge.
---
## 8. Risks & Mitigations
| Risk | Mitigation |
|-----------------------------------------------------------------|----------------------------------------------------------------|
| Modulo-by-prime is a *division*, slower than mask | Use Lemire's `fastmod` (one mul + one shift) — already in tree |
| Sinclair witness set has subtle bugs in edge cases (n < 9) | Hard-code small-prime fast path + 100% branch coverage tests |
| WASM `u128` codegen is ~5× slower than native | u128 mode is opt-in; default paths are u64 |
| Cluster mid-flight reshard exposes intermediate state | Phase 1 ships behind `--feature prime-shard`; rollout is gated |
| Witness-chain change breaks older pi-brain peers | New field is `Option<…>`; verifiers ignore-on-absent |
| "Yet another collections crate" sprawl | All work lives in *existing* `ruvector-collections` |
---
## 9. Open Questions
1. Should `next_prime_u64` accept a *budget* (max-distance) and return
`Option<u64>` instead of looping unbounded? (Probably yes.)
2. Do we want a `PrimeModHash<H>` newtype wrapper that auto-applies fastmod,
or expose `prev_prime` and let callers compose? (Lean: wrapper.)
3. Does the witness-chain ephemeral prime need to be authenticated under the
sender's key, or is per-share derivation from `SHA256(payload)` enough?
(Defer to security review during Phase 4.)
---
## 10. Out of Scope (deliberately)
- Big-integer / arbitrary-precision Miller-Rabin (use `num-bigint` if ever
needed — not on the roadmap).
- Replacing XXH3 as ruvector's primary hash (ADR-058's job).
- Strong-pseudoprime-based Lucas certificates (yagni for hashing).
- Distributed prime-generation protocols (we mint locally, deterministically).
---
## 11. Approval Checklist
- [ ] Architecture review (links ADR-151)
- [ ] Security review (esp. §4.4 witness chain)
- [ ] Performance baseline captured for shard-router and HNSW p99
- [ ] WASM size budget verified (`micro-hnsw-wasm` < +2 KB)
- [ ] Documentation: README in `ruvector-collections` references new module