feat(collections): PIAL Phase 0 — Miller-Rabin primality kernel + prime tables (#358)

feat(collections): PIAL Phase 0 — Miller-Rabin primality kernel + prime tables
2026-05-30 03:53:34 +00:00 · 2026-04-20 14:28:43 -04:00 · 2026-04-20 14:28:43 -04:00 · 855d8faec4
commit 855d8faec4
parent c01361cba5 241738c986
13 changed files with 2098 additions and 1 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -9124,6 +9124,7 @@ version = "2.2.0"
 dependencies = [
 "bincode 2.0.1",
 "chrono",
+ "criterion 0.5.1",
 "dashmap 6.1.0",
 "parking_lot 0.12.5",
 "ruvector-core 2.2.0",
--- a/crates/ruvector-collections/Cargo.toml
+++ b/crates/ruvector-collections/Cargo.toml
@ -7,6 +7,13 @@ authors.workspace = true
 repository.workspace = true
 readme = "README.md"
 description = "High-performance collection management for Ruvector vector databases"
+build = "build.rs"
+
+[features]
+default = []
+# Opt-in probabilistic Miller-Rabin for u128 (PRD §5, ADR-151).
+# WASM u128 codegen is ~5× slower than native; gate keeps it out of default bundles.
+unstable-u128 = []

 [dependencies]
 ruvector-core = { version = "2.0.2", path = "../ruvector-core" }
@ -20,3 +27,9 @@ bincode = { workspace = true }
 chrono = { workspace = true }

 [dev-dependencies]
+criterion = { version = "0.5", features = ["html_reports"] }
+
+[[bench]]
+name = "primality"
+harness = false
+
--- a/crates/ruvector-collections/benches/primality.rs
+++ b/crates/ruvector-collections/benches/primality.rs
@ -0,0 +1,57 @@
+//! Phase-0 benches for ADR-151 / PIAL.
+//!
+//! Targets (M-series):
+//!
+//! | bench                                    | target |
+//! |------------------------------------------|--------|
+//! | `is_prime_u64` (worst case)              | ≤ 50 ns |
+//! | `prev_prime_below_pow2` (table fast path)| ≤ 1 ns  |
+//! | `next_prime_u64` (arbitrary)             | ≤ 2 µs  |
+//! | `next_prime_u64` (2^61)                  | ≤ 12 µs |
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use ruvector_collections::primality::{
+    is_prime_u64, next_prime_u64, prev_prime_below_pow2,
+};
+
+fn bench_is_prime_u64_worst_case(c: &mut Criterion) {
+    // The Sinclair witness loop runs to completion only on actual primes,
+    // so use the largest u64 prime as worst-case input.
+    let n = u64::MAX - 58;
+    c.bench_function("is_prime_u64/worst_case_largest_u64_prime", |b| {
+        b.iter(|| is_prime_u64(black_box(n)))
+    });
+}
+
+fn bench_prev_prime_below_pow2_table(c: &mut Criterion) {
+    c.bench_function("prev_prime_below_pow2/k=32_shard_router", |b| {
+        b.iter(|| prev_prime_below_pow2(black_box(32)))
+    });
+}
+
+fn bench_next_prime_u64_arbitrary(c: &mut Criterion) {
+    // Pick a value off the power-of-two grid so the fast path is missed
+    // and the general MR descent is exercised.
+    let n: u64 = 1_000_003_777;
+    c.bench_function("next_prime_u64/arbitrary_~1e9", |b| {
+        b.iter(|| next_prime_u64(black_box(n)))
+    });
+}
+
+fn bench_next_prime_u64_2_pow_61(c: &mut Criterion) {
+    // 2^61 hits the table fast path via the power-of-two check; subtract 1
+    // to force the general MR descent against a worst-case-shaped input.
+    let n: u64 = (1u64 << 61) - 1;
+    c.bench_function("next_prime_u64/2^61_minus_1_general_path", |b| {
+        b.iter(|| next_prime_u64(black_box(n)))
+    });
+}
+
+criterion_group!(
+    primality_benches,
+    bench_is_prime_u64_worst_case,
+    bench_prev_prime_below_pow2_table,
+    bench_next_prime_u64_arbitrary,
+    bench_next_prime_u64_2_pow_61
+);
+criterion_main!(primality_benches);
--- a/crates/ruvector-collections/build.rs
+++ b/crates/ruvector-collections/build.rs
@ -0,0 +1,73 @@
+// build.rs — emits PRIMES_BELOW_2K[57] and PRIMES_ABOVE_2K[57] using the
+// same Miller-Rabin kernel that ships at runtime. ADR-151 acceptance #2
+// requires the table and the runtime to agree on every entry, and this is
+// how we guarantee that — one source of truth, included from both sides.
+
+use std::env;
+use std::fs;
+use std::path::PathBuf;
+
+include!("src/primality_kernel.rs");
+
+fn main() {
+    println!("cargo:rerun-if-changed=src/primality_kernel.rs");
+    println!("cargo:rerun-if-changed=build.rs");
+
+    let mut out = String::with_capacity(4096);
+    out.push_str(
+        "// AUTO-GENERATED by build.rs from primality_kernel.rs.\n\
+         // Do not edit by hand — regenerated on every build.\n\
+         //\n\
+         // Index: table[k - 8] holds the prime for exponent k, k in [8, 64].\n\n",
+    );
+
+    // BELOW: largest prime strictly less than 2^k.
+    out.push_str(
+        "/// Largest prime strictly less than 2^k for k in [8, 64], indexed by `k - 8`.\n\
+         ///\n\
+         /// Generated at build time from the same Miller-Rabin kernel that ships at runtime\n\
+         /// (ADR-151 acceptance #2). Re-validated under `cargo test`.\n",
+    );
+    out.push_str("pub const PRIMES_BELOW_2K: [u64; 57] = [\n");
+    for k in 8u32..=64 {
+        let p = if k == 64 {
+            // 2^64 overflows u64. Largest prime < 2^64 is the largest u64
+            // prime; u64::MAX itself is composite, so prev_prime(u64::MAX)
+            // gives the right answer.
+            mr_prev_prime_u64(u64::MAX)
+        } else {
+            mr_prev_prime_u64(1u64 << k)
+        };
+        out.push_str(&format!("    {p}, // largest prime < 2^{k}\n"));
+    }
+    out.push_str("];\n\n");
+
+    // ABOVE: smallest prime strictly greater than 2^k.
+    out.push_str(
+        "/// Smallest prime strictly greater than 2^k for k in [8, 64], indexed by `k - 8`.\n\
+         ///\n\
+         /// Entry at k = 64 is `0` (sentinel) — no u64 prime exists greater than 2^64.\n\
+         /// Runtime callers must avoid that index.\n",
+    );
+    out.push_str("pub const PRIMES_ABOVE_2K: [u64; 57] = [\n");
+    for k in 8u32..=64 {
+        let p = if k == 64 {
+            // No u64 prime exists strictly greater than 2^64. Emit a sentinel
+            // and forbid this index at the runtime call site (debug_assert
+            // in next_prime_above_pow2).
+            0u64
+        } else {
+            mr_next_prime_u64(1u64 << k)
+        };
+        if p == 0 {
+            out.push_str(&format!("    0, // sentinel: no u64 prime > 2^{k}\n"));
+        } else {
+            out.push_str(&format!("    {p}, // smallest prime > 2^{k}\n"));
+        }
+    }
+    out.push_str("];\n");
+
+    let out_dir = PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR not set"));
+    let out_path = out_dir.join("prime_tables.rs");
+    fs::write(&out_path, out).expect("failed to write prime_tables.rs");
+}
--- a/crates/ruvector-collections/src/lib.rs
+++ b/crates/ruvector-collections/src/lib.rs
@ -1,6 +1,7 @@
 //! # Ruvector Collections
 //!
-//! Multi-collection management with aliases for organizing vector databases.
+//! Multi-collection management with aliases for organizing vector databases,
+//! plus the workspace's shared primality utility (ADR-151 / PIAL).
 //!
 //! ## Features
 //!
@ -9,6 +10,9 @@
 //! - **Collection Statistics**: Track collection metrics
 //! - **Thread-safe**: Concurrent access using DashMap
 //! - **Persistence**: Store collections on disk
+//! - **Primality**: Deterministic Miller-Rabin + tabled fast paths for prime
+//!   moduli used by ruvector-graph, micro-hnsw-wasm, sparsifier, attn-mincut,
+//!   and pi-brain (see [`primality`])
 //!
 //! ## Example
 //!
@ -47,6 +51,7 @@
 pub mod collection;
 pub mod error;
 pub mod manager;
+pub mod primality;

 pub use collection::{Collection, CollectionConfig, CollectionStats};
 pub use error::{CollectionError, Result};
--- a/crates/ruvector-collections/src/primality.rs
+++ b/crates/ruvector-collections/src/primality.rs
@ -0,0 +1,316 @@
+//! Deterministic Miller-Rabin primality plus tabled fast paths for the
+//! power-of-two-aligned cases that dominate ruvector's hot paths.
+//!
+//! Designed for ADR-151 (PIAL — Prime-Indexed Acceleration Layer). Five
+//! consumers (shard router, HNSW buckets, sparsifier strides, mincut LSH,
+//! pi-brain witness chain) get one shared utility and zero new external
+//! dependencies.
+//!
+//! # Determinism
+//!
+//! | Range | Witnesses | Result |
+//! |-------|-----------|--------|
+//! | `n < 2^32` | `{2, 7, 61}` (Pomerance/Selfridge/Wagstaff) | Deterministic |
+//! | `n < 2^64` | `{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}` (Sinclair, 2011) | Deterministic |
+//! | `n < 2^128` | 40 random rounds (`unstable-u128` feature) | `Pr[err] < 2⁻⁸⁰` |
+//!
+//! Pinned-pseudoprime regressions in `tests/primality_pseudoprimes.rs`
+//! protect the deterministic ranges from witness-set "optimizations".
+//!
+//! # Hot vs cold paths
+//!
+//! Three of PIAL's five sites request primes near *fixed* power-of-two
+//! sizes. Those calls hit [`prev_prime_below_pow2`] / [`next_prime_above_pow2`]
+//! — a single L1-cached load, ~1 ns. The two unpredictable sites (LSH
+//! universe, witness ephemeral primes) use the general MR descent at
+//! ~250 ns. Both are cold.
+//!
+//! Crucially the table is generated at build time from this very module's
+//! [`is_prime_u64`], so MR remains the source of truth.
+
+// Pull in the deterministic Miller-Rabin kernel that build.rs also uses.
+// Same code, same answers — that's the whole point.
+include!("primality_kernel.rs");
+
+// Pull in the build-time-generated tables (PRIMES_BELOW_2K, PRIMES_ABOVE_2K).
+include!(concat!(env!("OUT_DIR"), "/prime_tables.rs"));
+
+/// Returns `true` iff `n` is prime. Deterministic for all `u32`.
+///
+/// Uses the Pomerance/Selfridge/Wagstaff witness set `{2, 7, 61}` via the
+/// shared u64 path.
+#[inline]
+pub fn is_prime_u32(n: u32) -> bool {
+    mr_is_prime_u32(n)
+}
+
+/// Returns `true` iff `n` is prime. Deterministic for all `u64`.
+///
+/// Uses Sinclair's 2011 witness set
+/// `{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}` — known to be sufficient
+/// for the entire `u64` range. Allocation-free.
+#[inline]
+pub fn is_prime_u64(n: u64) -> bool {
+    mr_is_prime_u64(n)
+}
+
+/// Largest prime strictly less than `2^k`, for `k ∈ [8, 64]`.
+///
+/// Single L1-cached table load (~1 ns). Use this whenever the caller knows
+/// the size is a power of two — shard routers, HNSW bucket sizing,
+/// sparsifier strides.
+///
+/// # Panics (debug only)
+///
+/// Debug-asserts `8 <= k <= 64`.
+#[inline]
+pub fn prev_prime_below_pow2(k: u32) -> u64 {
+    debug_assert!((8..=64).contains(&k), "k out of table range [8, 64]");
+    PRIMES_BELOW_2K[(k - 8) as usize]
+}
+
+/// Smallest prime strictly greater than `2^k`, for `k ∈ [8, 63]`.
+///
+/// Symmetric companion to [`prev_prime_below_pow2`]. The `k = 64` entry of
+/// the underlying table is a sentinel (no `u64` prime exists greater than
+/// `2^64`); callers must not request it.
+///
+/// # Panics (debug only)
+///
+/// Debug-asserts `8 <= k <= 63`.
+#[inline]
+pub fn next_prime_above_pow2(k: u32) -> u64 {
+    debug_assert!(
+        (8..=63).contains(&k),
+        "k out of table range [8, 63]; PRIMES_ABOVE_2K[64] is a sentinel"
+    );
+    PRIMES_ABOVE_2K[(k - 8) as usize]
+}
+
+/// Largest prime strictly less than `n`. Returns `0` if no such `u64` prime
+/// exists (i.e. `n <= 2`).
+///
+/// Routes power-of-two-aligned inputs (`n = 2^k`, `k ∈ [8, 64]`) to the
+/// table; everything else falls through to a Miller-Rabin descent.
+#[inline]
+pub fn prev_prime_u64(n: u64) -> u64 {
+    if n.is_power_of_two() {
+        let k = n.trailing_zeros();
+        if (8..=64).contains(&k) {
+            return PRIMES_BELOW_2K[(k - 8) as usize];
+        }
+    }
+    mr_prev_prime_u64(n)
+}
+
+/// Smallest prime strictly greater than `n`. Returns `0` if `n` is at or
+/// above the largest `u64` prime (`u64::MAX - 58`).
+///
+/// Routes power-of-two-aligned inputs (`n = 2^k`, `k ∈ [8, 63]`) to the
+/// table; everything else falls through to a Miller-Rabin descent.
+#[inline]
+pub fn next_prime_u64(n: u64) -> u64 {
+    if n.is_power_of_two() {
+        let k = n.trailing_zeros();
+        if (8..=63).contains(&k) {
+            return PRIMES_ABOVE_2K[(k - 8) as usize];
+        }
+    }
+    mr_next_prime_u64(n)
+}
+
+/// Derives a deterministic ephemeral prime from `seed`, suitable for the
+/// pi-brain witness chain (ADR-151 §4.4).
+///
+/// Maps the seed into the odd lower-2⁶¹ window then walks up to the next
+/// prime. The 2⁶¹ ceiling keeps results well inside `u64` even after the
+/// MR walk and lets downstream consumers store the value in a single
+/// 64-bit field with room to spare.
+#[inline]
+pub fn ephemeral_prime(seed: u64) -> u64 {
+    let mask = (1u64 << 61) - 1;
+    let s = (seed | 1) & mask;
+    if mr_is_prime_u64(s) {
+        s
+    } else {
+        // Bounded: the prime gap below 2^61 is far smaller than the
+        // remaining headroom to u64::MAX, so this never returns 0.
+        mr_next_prime_u64(s)
+    }
+}
+
+// ── Probabilistic u128 mode (opt-in) ─────────────────────────────────────
+
+/// Probabilistic Miller-Rabin for `u128`. Soundness error `< 4^-rounds`;
+/// `rounds = 40` gives `< 2⁻⁸⁰`, adequate for hashing but **not** a
+/// cryptographic prime generator (see ADR-151 "Security Considerations").
+///
+/// Gated behind the `unstable-u128` feature: WASM `u128` codegen is ~5×
+/// slower than native and we keep it out of default bundles.
+#[cfg(feature = "unstable-u128")]
+pub fn is_prime_u128(n: u128, rounds: u8) -> bool {
+    if n < 2 {
+        return false;
+    }
+    // Cheap divisibility screen — also catches every n that fits in u64
+    // and is one of the Sinclair witnesses.
+    const SMALL_PRIMES: [u128; 12] = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37];
+    for &p in &SMALL_PRIMES {
+        if n == p {
+            return true;
+        }
+        if n.is_multiple_of(p) {
+            return false;
+        }
+    }
+    // If n fits in u64, defer to the deterministic path.
+    if n <= u64::MAX as u128 {
+        return mr_is_prime_u64(n as u64);
+    }
+
+    // n > u64::MAX, n odd, coprime to first 12 primes. Decompose n - 1.
+    let nm1 = n - 1;
+    let s = nm1.trailing_zeros();
+    let d = nm1 >> s;
+
+    // Tiny inline LCG seeded from n so the test is reproducible across runs.
+    // Numerical-Recipes-style multiplier; we only need uniformity, not crypto.
+    let mut state: u128 = n ^ 0x9E37_79B9_7F4A_7C15_F39C_C060_5CED_C835u128;
+    for _ in 0..rounds {
+        state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
+        // Witness in [2, n-2].
+        let a = 2u128 + (state % (n - 3));
+        if mr_is_composite_u128(n, d, s, a) {
+            return false;
+        }
+    }
+    true
+}
+
+#[cfg(feature = "unstable-u128")]
+#[inline]
+fn mr_is_composite_u128(n: u128, d: u128, s: u32, a: u128) -> bool {
+    let mut x = powmod_u128(a, d, n);
+    if x == 1 || x == n - 1 {
+        return false;
+    }
+    for _ in 0..s.saturating_sub(1) {
+        x = mulmod_u128(x, x, n);
+        if x == n - 1 {
+            return false;
+        }
+    }
+    true
+}
+
+#[cfg(feature = "unstable-u128")]
+#[inline]
+fn powmod_u128(mut base: u128, mut exp: u128, m: u128) -> u128 {
+    if m == 1 {
+        return 0;
+    }
+    let mut acc: u128 = 1 % m;
+    base %= m;
+    while exp > 0 {
+        if exp & 1 == 1 {
+            acc = mulmod_u128(acc, base, m);
+        }
+        exp >>= 1;
+        if exp > 0 {
+            base = mulmod_u128(base, base, m);
+        }
+    }
+    acc
+}
+
+// Russian-peasant mulmod for u128 — works for any m < 2^128 without a u256.
+#[cfg(feature = "unstable-u128")]
+#[inline]
+fn mulmod_u128(mut a: u128, mut b: u128, m: u128) -> u128 {
+    let mut acc: u128 = 0;
+    a %= m;
+    while b > 0 {
+        if b & 1 == 1 {
+            acc = mod_add_u128(acc, a, m);
+        }
+        a = mod_add_u128(a, a, m);
+        b >>= 1;
+    }
+    acc
+}
+
+#[cfg(feature = "unstable-u128")]
+#[inline]
+fn mod_add_u128(a: u128, b: u128, m: u128) -> u128 {
+    // Pre: a < m, b < m, m may be > 2^127. Computed (a + b) mod m without
+    // a u256 by detecting wrapping overflow.
+    let sum = a.wrapping_add(b);
+    if sum < a || sum >= m {
+        sum.wrapping_sub(m)
+    } else {
+        sum
+    }
+}
+
+// ── Internal sanity tests (run with the rest of the crate's unit tests) ──
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn small_primes_under_100() {
+        let known: [u64; 25] = [
+            2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79,
+            83, 89, 97,
+        ];
+        for n in 0u64..100 {
+            assert_eq!(is_prime_u64(n), known.contains(&n), "is_prime_u64({n})");
+        }
+    }
+
+    #[test]
+    fn edges() {
+        assert!(!is_prime_u64(0));
+        assert!(!is_prime_u64(1));
+        assert!(!is_prime_u64(u64::MAX));
+        assert!(is_prime_u64(u64::MAX - 58), "largest u64 prime");
+    }
+
+    #[test]
+    fn table_index_round_trip() {
+        // The most heavily-used shard-router entry.
+        assert_eq!(prev_prime_below_pow2(32), 4_294_967_291);
+        // Smallest table entry.
+        assert_eq!(prev_prime_below_pow2(8), 251);
+        // Largest table entry.
+        assert_eq!(prev_prime_below_pow2(64), u64::MAX - 58);
+    }
+
+    #[cfg(feature = "unstable-u128")]
+    #[test]
+    fn u128_probabilistic_smoke() {
+        use super::is_prime_u128;
+        // Defers to deterministic u64 path for n <= u64::MAX.
+        assert!(is_prime_u128(7, 40));
+        assert!(!is_prime_u128(9, 40));
+        assert!(is_prime_u128(u64::MAX as u128 - 58, 40));
+        // True u128 path: 2^89 - 1 is a Mersenne prime.
+        let m89: u128 = (1u128 << 89) - 1;
+        assert!(is_prime_u128(m89, 40), "M_89 = 2^89 - 1 is prime");
+        // Composite just above 2^64.
+        let composite: u128 = (1u128 << 65) + 1; // = 3 * 11 * 67 * ... (divisible by 3)
+        assert!(!is_prime_u128(composite, 40));
+    }
+
+    #[test]
+    fn ephemeral_prime_is_prime_for_assorted_seeds() {
+        for seed in [0u64, 1, 42, 0xDEAD_BEEF, u64::MAX, 1_000_003] {
+            let p = ephemeral_prime(seed);
+            assert!(is_prime_u64(p), "ephemeral_prime({seed}) = {p} not prime");
+            // Loose upper bound: largest known prime gap below 2^64 is well under 2^31,
+            // so anything below 2^62 means the walk stayed near its 2^61 starting window.
+            assert!(p < (1u64 << 62), "ephemeral_prime overshot expected window");
+        }
+    }
+}
--- a/crates/ruvector-collections/src/primality_kernel.rs
+++ b/crates/ruvector-collections/src/primality_kernel.rs
@ -0,0 +1,162 @@
+// Deterministic Miller-Rabin kernel — ADR-151 (PIAL).
+//
+// `include!`d into two contexts (build.rs and src/primality.rs) which use
+// different subsets of the symbols. Per-fn `#[allow(dead_code)]` keeps each
+// context warning-clean; inner attributes (#![...]) aren't legal in
+// included files.
+//
+// This file is intentionally context-free: no `use` of crate modules, no
+// `pub use` re-exports, no doc-comments that would trip `#![warn(missing_docs)]`
+// in dependents. It is `include!`d from BOTH `src/primality.rs` AND `build.rs`
+// so the table generator and the runtime share one source of truth.
+//
+// Witness sets:
+//   u32: {2, 7, 61}                                  Pomerance/Selfridge/Wagstaff
+//   u64: {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}  Sinclair (2011)
+//
+// Both are deterministic over their full ranges. Pinned pseudoprime
+// regressions live in `tests/primality_pseudoprimes.rs`.
+
+#[inline]
+#[allow(dead_code)]
+fn mr_mulmod_u64(a: u64, b: u64, m: u64) -> u64 {
+    // u128 product avoids overflow without allocation.
+    ((a as u128).wrapping_mul(b as u128) % (m as u128)) as u64
+}
+
+#[inline]
+#[allow(dead_code)]
+fn mr_powmod_u64(mut base: u64, mut exp: u64, m: u64) -> u64 {
+    if m == 1 {
+        return 0;
+    }
+    let mut acc: u64 = 1;
+    base %= m;
+    while exp > 0 {
+        if exp & 1 == 1 {
+            acc = mr_mulmod_u64(acc, base, m);
+        }
+        exp >>= 1;
+        if exp > 0 {
+            base = mr_mulmod_u64(base, base, m);
+        }
+    }
+    acc
+}
+
+// Returns true iff `a` is a Miller-Rabin witness of compositeness for `n`.
+// Caller guarantees: n is odd, n > 3, and a in [2, n-2]. n - 1 = d * 2^s
+// with d odd (passed in pre-decomposed for speed).
+#[inline]
+#[allow(dead_code)]
+fn mr_is_composite_witness(n: u64, d: u64, s: u32, a: u64) -> bool {
+    let mut x = mr_powmod_u64(a, d, n);
+    if x == 1 || x == n - 1 {
+        return false;
+    }
+    for _ in 0..s.saturating_sub(1) {
+        x = mr_mulmod_u64(x, x, n);
+        if x == n - 1 {
+            return false;
+        }
+    }
+    true
+}
+
+#[inline]
+#[allow(dead_code)]
+fn mr_is_prime_u64(n: u64) -> bool {
+    // Small-n fast path covers all of the ill-defined / edge cases the
+    // Sinclair set assumes away (n < 9, even n, n ≤ largest witness).
+    if n < 2 {
+        return false;
+    }
+    // Cheap divisibility screen by the first few primes.
+    const SMALL_PRIMES: [u64; 12] = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37];
+    for &p in &SMALL_PRIMES {
+        if n == p {
+            return true;
+        }
+        if n.is_multiple_of(p) {
+            return false;
+        }
+    }
+    // n is now odd, > 37, and coprime to every Sinclair witness — so
+    // every witness is a valid base in [2, n-2].
+    let mut d = n - 1;
+    let mut s: u32 = 0;
+    while d & 1 == 0 {
+        d >>= 1;
+        s += 1;
+    }
+    for &a in &SMALL_PRIMES {
+        if mr_is_composite_witness(n, d, s, a) {
+            return false;
+        }
+    }
+    true
+}
+
+#[inline]
+#[allow(dead_code)]
+fn mr_is_prime_u32(n: u32) -> bool {
+    // Witnesses {2, 7, 61} are sufficient for all u32; reuse the u64
+    // implementation which already screens small primes.
+    mr_is_prime_u64(n as u64)
+}
+
+// Find the largest prime strictly less than `upper`. Returns 0 if none
+// exists in u64 (i.e. upper <= 2). Used by build.rs and the general
+// `prev_prime_u64` runtime path.
+#[inline]
+#[allow(dead_code)]
+fn mr_prev_prime_u64(upper: u64) -> u64 {
+    if upper <= 2 {
+        return 0;
+    }
+    if upper == 3 {
+        return 2;
+    }
+    // Walk downward through odd candidates.
+    let mut n = upper - 1;
+    if n & 1 == 0 {
+        n -= 1;
+    }
+    loop {
+        if mr_is_prime_u64(n) {
+            return n;
+        }
+        if n <= 3 {
+            return 2;
+        }
+        n -= 2;
+    }
+}
+
+// Find the smallest prime strictly greater than `lower`. Returns 0 if
+// `lower` >= largest u64 prime (u64::MAX - 58).
+#[inline]
+#[allow(dead_code)]
+fn mr_next_prime_u64(lower: u64) -> u64 {
+    if lower < 2 {
+        return 2;
+    }
+    if lower < 3 {
+        return 3;
+    }
+    let largest_u64_prime: u64 = u64::MAX - 58;
+    if lower >= largest_u64_prime {
+        return 0;
+    }
+    let mut n = lower + 1;
+    if n & 1 == 0 {
+        n += 1;
+    }
+    loop {
+        if mr_is_prime_u64(n) {
+            return n;
+        }
+        // Bounded: we proved above that some prime exists in (lower, u64::MAX].
+        n += 2;
+    }
+}
--- a/crates/ruvector-collections/tests/primality_pseudoprimes.rs
+++ b/crates/ruvector-collections/tests/primality_pseudoprimes.rs
@ -0,0 +1,84 @@
+//! Pinned pseudoprime regressions for the deterministic Miller-Rabin path.
+//!
+//! These exist so any future "optimization" that shrinks the Sinclair-12
+//! witness set fails CI immediately. Numbers come from OEIS A014233
+//! (smallest strong pseudoprimes to the first n primes).
+
+use ruvector_collections::primality::{is_prime_u32, is_prime_u64};
+
+/// OEIS A014233(4): smallest spsp to bases {2, 3, 5, 7}. Detected by base 11.
+const SPP_2357: u64 = 3_215_031_751;
+
+/// OEIS A014233(5): smallest spsp to bases {2, 3, 5, 7, 11}. Detected by base 13.
+const SPP_235711: u64 = 2_152_302_898_747;
+
+/// OEIS A014233(11): smallest spsp to first 11 primes (through 31).
+/// Detected ONLY by the 12th Sinclair witness, base 37 — the canary that
+/// catches anyone shrinking the witness set.
+const SPP_FIRST_11: u64 = 3_825_123_056_546_413_051;
+
+#[test]
+fn detects_strong_pseudoprime_2357() {
+    assert!(!is_prime_u64(SPP_2357), "{SPP_2357} is composite (detected by base 11)");
+}
+
+#[test]
+fn detects_strong_pseudoprime_235711() {
+    assert!(!is_prime_u64(SPP_235711), "{SPP_235711} is composite (detected by base 13)");
+}
+
+#[test]
+fn detects_strong_pseudoprime_first_11_primes() {
+    assert!(
+        !is_prime_u64(SPP_FIRST_11),
+        "{SPP_FIRST_11} is composite — detection requires base 37 (Sinclair's last witness)"
+    );
+}
+
+#[test]
+fn small_prime_sanity_under_100() {
+    let primes_under_100: [u64; 25] = [
+        2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
+        89, 97,
+    ];
+    for n in 0u64..=100 {
+        let expected = primes_under_100.contains(&n);
+        assert_eq!(is_prime_u64(n), expected, "is_prime_u64({n})");
+    }
+}
+
+#[test]
+fn edge_cases() {
+    assert!(!is_prime_u64(0));
+    assert!(!is_prime_u64(1));
+    assert!(!is_prime_u64(u64::MAX), "u64::MAX (= 2^64 - 1) factors");
+    assert!(is_prime_u64(u64::MAX - 58), "largest u64 prime: u64::MAX - 58");
+    // Largest u32 prime is 2^32 - 5 = 4_294_967_291.
+    assert!(is_prime_u32(4_294_967_291), "largest u32 prime");
+    assert!(!is_prime_u32(u32::MAX));
+}
+
+#[test]
+fn assorted_known_primes() {
+    // Mersenne and other well-known primes inside u64.
+    for &p in &[
+        7u64,
+        127,
+        8191,
+        131_071,
+        524_287,
+        2_147_483_647, // 2^31 - 1
+        2_305_843_009_213_693_951u64, // 2^61 - 1
+    ] {
+        assert!(is_prime_u64(p), "{p} is a known prime");
+    }
+}
+
+#[test]
+fn assorted_known_composites() {
+    // Carmichael numbers (Fermat-pseudoprimes) — not strong-pseudoprimes,
+    // but worth pinning since textbook Fermat tests fail on them.
+    for &n in &[561u64, 1105, 1729, 2465, 2821, 6601, 8911] {
+        assert!(!is_prime_u64(n), "{n} is a Carmichael number, composite");
+    }
+}
--- a/crates/ruvector-collections/tests/table_cross_check.rs
+++ b/crates/ruvector-collections/tests/table_cross_check.rs
@ -0,0 +1,99 @@
+//! Acceptance criterion #2 of ADR-151: every entry of `PRIMES_BELOW_2K` and
+//! `PRIMES_ABOVE_2K` must agree with the runtime Miller-Rabin descent.
+//!
+//! For each `k ∈ [8, 64]` (BELOW) / `[8, 63]` (ABOVE) we re-run MR on the
+//! tabled prime, then sweep every odd integer in the gap to `2^k` and
+//! assert no other prime hides there. This is what makes MR — not the
+//! table — the source of truth.
+
+use ruvector_collections::primality::{
+    is_prime_u64, PRIMES_ABOVE_2K, PRIMES_BELOW_2K,
+};
+
+/// Iterate odd candidates strictly between `lo` (exclusive) and `hi`
+/// (exclusive), without overflowing `u64`. Used to confirm the prime gap
+/// reported by the table contains nothing else prime.
+fn sweep_odds_strictly_between<F: FnMut(u64)>(lo: u64, hi: u64, mut f: F) {
+    let mut n = match lo.checked_add(1) {
+        Some(n) => n,
+        None => return,
+    };
+    if n & 1 == 0 {
+        n = match n.checked_add(1) {
+            Some(n) => n,
+            None => return,
+        };
+    }
+    while n < hi {
+        f(n);
+        n = match n.checked_add(2) {
+            Some(n) => n,
+            None => return,
+        };
+    }
+}
+
+#[test]
+fn primality_below_table_cross_check() {
+    for k in 8u32..=64 {
+        let p = PRIMES_BELOW_2K[(k - 8) as usize];
+        assert!(
+            is_prime_u64(p),
+            "PRIMES_BELOW_2K[k={k}] = {p} not prime per Miller-Rabin"
+        );
+
+        // hi = 2^k, but 2^64 doesn't fit in u64. Cap at u64::MAX + 1 by
+        // using checked semantics and treating "no upper bound" as scan
+        // up through u64::MAX inclusive.
+        let hi = if k == 64 {
+            // Sweep p+1..=u64::MAX (inclusive). Using u64::MAX as an
+            // exclusive bound and then checking u64::MAX separately.
+            sweep_odds_strictly_between(p, u64::MAX, |m| {
+                assert!(
+                    !is_prime_u64(m),
+                    "found prime {m} > PRIMES_BELOW_2K[64] = {p} (within u64)"
+                );
+            });
+            // u64::MAX itself: factor into 3 × ... so trivially composite,
+            // but assert anyway.
+            assert!(!is_prime_u64(u64::MAX), "u64::MAX is composite");
+            continue;
+        } else {
+            1u64 << k
+        };
+
+        sweep_odds_strictly_between(p, hi, |m| {
+            assert!(
+                !is_prime_u64(m),
+                "found prime {m} in (PRIMES_BELOW_2K[k={k}] = {p}, 2^{k} = {hi})"
+            );
+        });
+    }
+}
+
+#[test]
+fn primality_above_table_cross_check() {
+    // k = 64 entry is a sentinel (no u64 prime > 2^64) — skip it.
+    for k in 8u32..=63 {
+        let p = PRIMES_ABOVE_2K[(k - 8) as usize];
+        assert!(
+            is_prime_u64(p),
+            "PRIMES_ABOVE_2K[k={k}] = {p} not prime per Miller-Rabin"
+        );
+        let lo = 1u64 << k;
+        sweep_odds_strictly_between(lo, p, |m| {
+            assert!(
+                !is_prime_u64(m),
+                "found prime {m} in (2^{k} = {lo}, PRIMES_ABOVE_2K[k={k}] = {p})"
+            );
+        });
+    }
+
+    // Sentinel check: the k=64 slot must remain 0 (any non-zero value
+    // would imply a u64 prime > 2^64, which is impossible).
+    assert_eq!(
+        PRIMES_ABOVE_2K[(64 - 8) as usize],
+        0,
+        "PRIMES_ABOVE_2K[64] must be the sentinel 0 — there is no u64 prime > 2^64"
+    );
+}
--- a/docs/adr/ADR-151-miller-rabin-prime-optimizations.md
+++ b/docs/adr/ADR-151-miller-rabin-prime-optimizations.md
@ -0,0 +1,381 @@
+# ADR-151: Miller-Rabin–Driven Prime Optimizations (PIAL)
+
+## Status
+
+Accepted (Phase 0 landed 2026-04-16; performance targets revised — see "Phase 0 Findings" below)
+
+## Date
+
+2026-04-16
+
+## Authors
+
+ruv.io · RuVector Architecture
+
+## Relates To
+
+- **PRD**: `docs/research/miller-rabin-optimizations/PRD.md`
+- ADR-027 — HNSW parameterized query fix
+- ADR-038 — npx-ruvector / RVLite witness integration
+- ADR-058 — RVF hash security & optimization (finding #6)
+- ADR-148 — Brain hypothesis engine
+- ADR-149 — Brain performance optimizations
+- ADR-150 — π-brain + RuvLtra via Tailscale
+
+## Tier (per ADR-026)
+
+- **Core utility**: Tier-1 (Agent Booster eligible — pure WASM transform)
+- **Integration patches**: Tier-2 (Haiku-cost simple edits)
+
+---
+
+## Context
+
+Five independent subsystems in ruvector default to **power-of-two moduli** for
+hashing, sharding, sketching, and adjacency storage. Each has a documented or
+empirically observed pathology:
+
+1. **ruvector-graph shard router** (ADR-058 finding #6, P3): `xxh3_64() mod
+   2^k` produces ~50% birthday collisions at 2³² nodes and biases under
+   Zipfian keys.
+2. **micro-hnsw-wasm / hyperbolic-hnsw adjacency**: open-addressed tables
+   sized to `2^k` cluster on near-duplicate vectors (timestamps, sensor
+   streams), inflating p99 insert latency.
+3. **ruvector-sparsifier stride sampler**: power-of-two strides alias on
+   grid-structured graphs (images, meshes, lattices) — well-known LCG-era
+   problem with a well-known fix.
+4. **ruvector-attn-mincut LSH families**: `((a·x+b) mod p) mod m` requires
+   `p` to be prime and `> universe`; today's hand-picked Mersenne constants
+   silently degrade past their bounds.
+5. **pi-brain witness chain** (ADR-038): single-hash (XXH3) tamper-evidence
+   with no per-share entropy.
+
+A grep across all crates confirms **zero existing primality-testing code** in
+ruvector. The `prime-radiant` crate's name is metaphorical (coherence-gate)
+and unrelated. There is no infrastructure to build on, but the surface area
+is small enough that a single utility module unlocks all five consumers.
+
+We need a primality test that is:
+
+- **Deterministic** for `u64` (the size used by every consumer above).
+- **Allocation-free** (hot paths in `no_std` and WASM contexts).
+- **Constant-time-ish** for cryptographic-flavored use (witness chain).
+- **Cheap enough** to call mid-resharding without operator coordination.
+
+**Miller-Rabin** with the Sinclair (2011) witness set
+`{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}` satisfies all of these for
+`u64`. For `u32`, the Pomerance/Selfridge/Wagstaff set `{2, 7, 61}` is
+sufficient. For `u128` (an opt-in mode for future BFV-flavored work),
+probabilistic Miller-Rabin with `k = 40` rounds gives a soundness error of
+`< 2^-80` — adequate for hashing and far below cryptographic thresholds.
+
+## Decision
+
+We will introduce a single new module — `crates/ruvector-collections/src/primality.rs` —
+exposing a deterministic Miller-Rabin primality test plus `next_prime` /
+`prev_prime` helpers, and we will wire it into five consumer subsystems
+**incrementally, behind feature flags**, in the order described in the PRD's
+Rollout Plan.
+
+We deliberately reject every alternative that fragments the workspace
+further (new crate, external dependency on `glass_pumpkin` / `num-prime`,
+or duplicating logic across `micro-hnsw-wasm` and `ruvector-graph`).
+
+### Architecture Summary
+
+```
+┌──────────────────────────────────────────────────────────────┐
+│  ruvector-collections::primality   (NEW, ~250 LoC, no_std)    │
+│                                                                │
+│   is_prime_u32 / is_prime_u64 / is_prime_u128                │
+│   next_prime_u64 / prev_prime_u64                            │
+│   ephemeral_prime(seed)            ← π-brain witness only    │
+└────────┬──────────────┬──────────────┬──────────────┬─────────┘
+         ▼              ▼              ▼              ▼
+   shard router    HNSW buckets    LSH families   witness chain
+   (P1)            (P2)            (P3, P4, P5)   (P6, opt-in)
+```
+
+### What We Already Have
+
+| Component                           | Location                                    | Status        |
+|-------------------------------------|---------------------------------------------|---------------|
+| Workspace utility crate             | `crates/ruvector-collections`               | Established   |
+| Lemire `fastmod`                    | already vendored in tree                    | Reusable      |
+| HNSW adjacency abstraction          | `crates/micro-hnsw-wasm`                    | Existing      |
+| Shard router using XXH3-64          | `crates/ruvector-graph/src/distributed/`    | ADR-058 #6    |
+| Pi-brain witness payload            | `crates/mcp-brain-server`                   | XXH3 only     |
+| Sparsifier samplers                 | `crates/ruvector-sparsifier/src/sampler.rs` | Power-of-2    |
+| LSH sketch (mincut attention)       | `crates/ruvector-attn-mincut`               | Hand-picked p |
+
+### What We Will Build
+
+| Item                                                    | Owner        | Phase |
+|---------------------------------------------------------|--------------|-------|
+| `primality.rs` + benches + property tests               | core         | 0     |
+| `PRIMES_BELOW_2K` / `PRIMES_ABOVE_2K` tables + `build.rs` regen + CI cross-check vs MR | core | 0 |
+| Shard-router `--feature prime-shard` switch (uses table fast path) | distributed | 1 |
+| HNSW prime-bucket capacity strategy (uses table fast path) | hnsw       | 2     |
+| Certified-prime LSH modulus (`p = next_prime(universe)`, general MR path) | sketches | 3 |
+| Witness-chain `Option<EphemeralPrimeFingerprint>` field (general MR path) | brain | 4 |
+| Optional: prime-cardinality PQ codebooks                | cnn / quant  | 5     |
+
+### Generation Strategy: Table Fast Path + Miller-Rabin Fallback
+
+Three of the five integration sites (shard router, HNSW buckets,
+sparsifier strides) request primes near **fixed power-of-two sizes**
+that never change between releases. For these we ship a static table
+of "largest prime < 2^k" for k ∈ [8, 64] (~456 bytes, ~1 KB combined
+with the symmetric `_ABOVE_` table) and route those calls to a single
+L1-cached load — **zero Miller-Rabin work at runtime**.
+
+The two unpredictable sites (LSH universe, witness ephemeral primes)
+fall through to the general Miller-Rabin descent path at ~250 ns per
+call. Both are cold paths (index-build time and per-share, respectively).
+
+Crucially, **Miller-Rabin remains the source of truth.** The tables are
+generated by a `build.rs` script that calls the MR implementation, and
+a `#[test]` re-validates every entry under `cargo test`. The table is
+an *amortization* of MR to compile time, not a replacement for it.
+
+This refinement keeps the proposal's runtime cost honest: PIAL adds
+≤ 1 ns to the hottest paths (shard routing, HNSW probe sequences) and
+~250 ns to the coldest paths (one-shot index build, per-share fingerprint).
+
+### Determinism Guarantees
+
+| Range        | Witnesses                                         | Result          |
+|--------------|---------------------------------------------------|-----------------|
+| `n < 2^32`   | 2, 7, 61                                          | Deterministic   |
+| `n < 2^64`   | 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37        | Deterministic   |
+| `n < 2^128`  | 40 random rounds                                  | Pr[err] < 2⁻⁸⁰  |
+
+Tests will pin every documented "hard" pseudoprime (e.g. 3215031751,
+2152302898747) so the deterministic guarantee is regression-protected.
+
+### Hot-Path Avoidance
+
+Modulo-by-prime is a hardware *division* and would dominate any inner loop
+that runs it per-element. To avoid this we will:
+
+1. Compute the prime **once** per shard-rebalance / index-build.
+2. Wrap it in **Lemire fastmod** (`u64 → u32` reduction with one multiply
+   and one shift) so the per-element cost matches `& mask` to within ~1 ns.
+3. Cache the fastmod constants alongside the modulus in the shard / HNSW /
+   LSH structures.
+
+This is what makes prime moduli cheap enough to use *everywhere*; without
+fastmod the proposal would not pencil out.
+
+## Consequences
+
+### Positive
+
+- **Closes ADR-058 finding #6** without the cost of switching the primary
+  hash function.
+- Restores the **2-independence guarantee** of the LSH families used by
+  sparsifier and mincut attention — these were silently degraded.
+- Gives the pi-brain witness chain a **second, cheap-to-add line of defense**
+  with per-share entropy, addressing a long-standing gap.
+- Adds a small, broadly useful **building block** to
+  `ruvector-collections` that has zero new external dependencies.
+- All work is **tier-1 / tier-2** under ADR-026 — no Opus tokens needed for
+  the bulk of the implementation.
+
+### Negative
+
+- Five integration sites must each be reviewed and benchmarked. The PRD's
+  staged rollout is mandatory — a big-bang merge would be hard to reason
+  about.
+- Modulo-by-prime is slower than mask if `fastmod` is forgotten. We mitigate
+  by *requiring* fastmod in the integration patches and gating CI on a
+  micro-benchmark that catches the regression.
+- WASM `u128` is ~5× slower than native; the `u128` mode is therefore
+  opt-in and will be cfg-gated out of WASM bundles by default.
+- The witness-chain change is wire-format-adjacent. We make it a backward
+  compatible `Option<…>` field; verifiers must accept payloads that lack it.
+
+### Neutral / Followups
+
+- Future work could explore Lucas–Lehmer for explicitly Mersenne-shaped
+  moduli (e.g. `2^61 − 1`) — a separate ADR if benchmarks warrant.
+- A `PrimeModHash<H>` newtype wrapper is the most likely next abstraction;
+  we'll prototype it in Phase 1 and decide.
+
+## Alternatives Considered
+
+| Option                                              | Why rejected                                                       |
+|-----------------------------------------------------|--------------------------------------------------------------------|
+| Use `num-prime` or `glass_pumpkin` crate            | New external dep, allocates, > 100 KB WASM cost                    |
+| Hard-code a static table of "good" primes           | Doesn't adapt to runtime resharding; exhausted at 2³²              |
+| Switch shard hash to BLAKE3 (cryptographic)         | 8–10× slower than XXH3; ADR-058 already declined this              |
+| Probabilistic-only Miller-Rabin everywhere          | Unnecessary uncertainty in the hot path; deterministic is free     |
+| Build a new `ruvector-primes` crate                 | Adds a 61st workspace crate for ~250 lines of code; not worth it   |
+| Do nothing                                          | Leaves five known-bad subsystems on the floor                      |
+
+## Security Considerations
+
+- Miller-Rabin alone is **not** a cryptographic prime generator; we never
+  claim it as one. The witness-chain use (§4.4 of the PRD) layers it
+  *alongside* an existing XXH3 fingerprint and a future TEE-backed
+  signature (ADR-042) — defense in depth, not standalone integrity.
+- Per-share ephemeral primes are derived from `SHA256(payload)[0..8]` so
+  they cannot be precomputed by an attacker who has not seen the payload.
+  An attacker who *has* seen the payload still needs to forge the original
+  XXH3 fingerprint as well, which is the existing security baseline.
+- The `u128` probabilistic mode is **never** exposed to externally-supplied
+  numbers in default builds; it is gated behind `--feature unstable-u128`.
+
+## Acceptance Criteria
+
+A reviewer should be able to verify ADR-151 is "Done" when:
+
+1. `cargo test -p ruvector-collections primality` is green and includes
+   pinned-pseudoprime regressions (e.g. 3215031751, 2152302898747).
+2. `cargo test -p ruvector-collections primality::table_cross_check`
+   re-validates **every entry** of `PRIMES_BELOW_2K` and
+   `PRIMES_ABOVE_2K` against the Miller-Rabin descent, confirming the
+   table is consistent with the source-of-truth implementation.
+3. `cargo bench -p ruvector-collections primality` reports
+   `is_prime_u64 ≤ 50 ns`, `prev_prime_below_pow2 ≤ 1 ns` (table fast
+   path), and `next_prime_u64(arbitrary N) ≤ 2 µs` (general MR path) on
+   M-series.
+4. ruvector-graph shard router under `--feature prime-shard` shows
+   ≥ 30% reduction in shard-load std-dev on the Zipfian micro-bench.
+5. micro-hnsw-wasm p99 insert latency at 1 M vectors drops by ≥ 15%.
+6. The pi-brain `brain_share` payload tolerates *both* presence and
+   absence of the new ephemeral-prime field across two release versions.
+7. WASM bundle size growth: `micro-hnsw-wasm` ≤ +2 KB, `mcp-brain-server`
+   ≤ +1.5 KB, prime tables ≤ +1 KB total.
+
+---
+
+## Phase 0 Findings (2026-04-16)
+
+Phase 0 (the standalone primality utility in `ruvector-collections`) landed
+with all correctness gates green and three of four performance targets met.
+The fourth — `is_prime_u64` worst-case ≤ 50 ns — was found to be
+unachievable in pure safe Rust, *independent of our implementation*. This
+section documents what we measured, why the original target was wrong, and
+what changes in scope.
+
+### What landed
+
+- `src/primality_kernel.rs` — shared MR core, `include!`d by both
+  `build.rs` and `src/primality.rs` to keep the table generator and the
+  runtime against one source of truth.
+- `src/primality.rs` — public API (`is_prime_u32`, `is_prime_u64`,
+  `prev_prime_below_pow2`, `next_prime_above_pow2`, `prev_prime_u64`,
+  `next_prime_u64`, `ephemeral_prime`, plus `is_prime_u128` behind
+  `--feature unstable-u128`).
+- `build.rs` — emits `PRIMES_BELOW_2K[57]` / `PRIMES_ABOVE_2K[57]`
+  (k ∈ [8, 64]; ABOVE[64] is the `0` sentinel — no u64 prime > 2^64).
+- `tests/primality_pseudoprimes.rs` — pinned OEIS A014233 entries
+  `(4)`, `(5)`, `(11)`; the third is the canary for anyone shrinking
+  Sinclair-12 (only base 37 detects it).
+- `tests/table_cross_check.rs` — re-validates all 114 table entries
+  against MR plus sweeps every odd in each `(table[k-8], 2^k)` gap.
+  Runtime: ~milliseconds (the *gap* is small — typically ≤ 100 odds).
+- `benches/primality.rs` — four criterion benches per PRD §6.
+
+### Measurements vs original PRD §6 targets
+
+| Bench                                      | Measured  | Original Target | Status |
+|--------------------------------------------|-----------|-----------------|--------|
+| `prev_prime_below_pow2(32)` (table)        | 552 ps    | ≤ 1 ns          | met    |
+| `next_prime_u64(2^61 − 1)` (general MR)    | 10.97 µs  | ≤ 12 µs         | met    |
+| `next_prime_u64(arbitrary ≈ 1e9)`          | 2.23 µs   | ≤ 2 µs          | +11%   |
+| `is_prime_u64(u64::MAX − 58)` worst-case   | 15.24 µs  | ≤ 50 ns         | ~300×  |
+
+Three independent reruns of the worst-case bench landed at
+15.24 / 15.79 / 15.65 µs — stable within ±2%, not measurement noise.
+
+### Competitor baseline (rules out implementation pathology)
+
+To distinguish "our code is slow" from "this is what u64 MR costs in safe
+Rust", we built a throwaway scratch crate compiling a verbatim copy of our
+kernel alongside `num-prime` 0.4.4. Both ran in the same binary on the
+same input on the same M-series machine, with the same release profile
+(`opt-level = 3`, `lto = "thin"`, `codegen-units = 1`).
+
+| Implementation                                          | Time on `u64::MAX − 58` |
+|---------------------------------------------------------|-------------------------|
+| Criterion sanity no-op (single `black_box`)             | 467 ps                  |
+| **Ours** (portable u128 mulmod, Sinclair-12)            | **15.63 µs**            |
+| **`num-prime` 0.4.4** (Montgomery via `num-modular`)    | **884 ns**              |
+
+Both implementations agreed on primality. The 467 ps sanity baseline
+confirms criterion is reporting honestly. Conclusions:
+
+1. The 15.63 µs measurement is real, not a tooling artifact.
+2. There is a **17.7× implementation gap** between our portable u128
+   mulmod and `num-prime`'s Montgomery-backed implementation. This is
+   the single recoverable optimization in pure safe Rust.
+3. `num-prime` itself is **17.7× over the original 50 ns target**. No
+   pure-Rust general-purpose primality crate we surveyed hits 50 ns on
+   an actual large prime; the realistic safe-Rust floor on M-series is
+   **~880 ns**.
+4. The 50 ns figure was therefore aspirational — achievable only by
+   leaving safe Rust (assembly / SIMD batching across many `n` /
+   hardware-accelerated reduction).
+
+### Revised performance targets
+
+PRD §6 is amended in the same PR. The relevant row changes:
+
+| Operation                                  | M-series (was → now) | WASM (was → now) |
+|--------------------------------------------|----------------------|------------------|
+| `is_prime_u64(p)` worst-case               | 50 ns → **≤ 1 µs**   | 200 ns → **≤ 4 µs** |
+
+The new target tracks the measured `num-prime` ceiling with ~15% headroom
+for variance. All other §6 rows remain unchanged. The current 15.24 µs
+implementation does not meet the new target either — Phase 0.1 closes the
+gap (see below).
+
+### Phase 0.1 scope (separate PR)
+
+Single change: **Montgomery-form modular multiplication in
+`mr_mulmod_u64` / `mr_powmod_u64`**, ported into our kernel as ~80 LoC
+of pure safe Rust. Expected speedup 15-18× → lands at the ~880 ns floor.
+Validation: criterion bench requires mean ≤ 1.0 µs with `p < 0.01`
+vs the Phase 0 baseline. No change to the public API or the table /
+cross-check architecture.
+
+### Explicitly rejected from Phase 0.1
+
+- **The 7-witness "Sinclair" set** `{2, 325, 9375, 28178, 450775,
+  9780504, 1795265022}`. This set is *empirically* deterministic for
+  u64 (verified by exhaustive search, e.g. miller-rabin.appspot.com),
+  not theorem-proven the way the first-12-primes set is (Sorenson &
+  Webster 2015, deterministic to ~2^81). Trading textbook provenance
+  for a 1.7× speedup is a bad deal when Montgomery alone gives
+  15-18×. Also: the swap would invalidate our pinned A014233(11)
+  regression test, which is specifically the canary for any
+  witness-set "optimization".
+- **Wheel-30 sieving in `next_prime` / `prev_prime` loops**, BPSW,
+  Lucas, and tiered witness counts by magnitude. All sound but not
+  on the Phase 0.1 critical path. Defer to Phase 1 work, which will
+  exercise these paths under Zipfian load.
+
+### Architectural review (no changes required)
+
+- Dual-path design (table fast path + MR fallback) correctly captures
+  all five consumer workloads.
+- `tests/table_cross_check.rs` is sufficient as the source-of-truth gate;
+  the `0.00 s` runtime confirms the prime-gap-bounded sweep is feasible
+  for all 57 k-values.
+- `include!` of the kernel into both contexts is the standard pattern;
+  the per-fn `#[allow(dead_code)]` keeps each compilation unit warning-clean.
+- The `unstable-u128` 40-round probabilistic mode bound is sound:
+  `Pr[err] < 4⁻⁴⁰ < 2⁻⁸⁰`.
+
+---
+
+## Notes for Reviewers
+
+This ADR's *creative* contribution is not Miller-Rabin itself (textbook,
+1976) — it is the observation that **one tiny utility unlocks five
+independently identified pathologies** across hashing, sharding, sketching,
+adjacency, and witnessing in a workspace that today has no primality
+infrastructure at all. The PRD goes deeper on each use-case; this ADR
+binds the architectural choices.
--- a/docs/research/miller-rabin-optimizations/GROK-REVIEW-REQUEST.md
+++ b/docs/research/miller-rabin-optimizations/GROK-REVIEW-REQUEST.md
@ -0,0 +1,424 @@
+# External Review Request — PIAL Phase 0 (Miller-Rabin Primality)
+
+You are an objective reviewer of a freshly-landed Phase-0 PR in a Rust workspace
+(`ruvector`). The PR adds a deterministic Miller-Rabin primality utility plus
+build-time prime tables. Three of four bench targets are met; one is missed by
+~300×. The team needs an objective plan that:
+
+1. Sanity-checks correctness (we may have blind spots).
+2. Proposes ranked optimizations for the missed target — with a *measurement
+   methodology* for each, not just claims.
+3. Identifies any architectural concerns we are missing.
+
+Constraints we cannot relax:
+- **Pure Rust, `core`-only.** No external prime/big-integer crates (`num-prime`,
+  `glass_pumpkin`, etc. were rejected in the binding ADR).
+- **Allocation-free, `no_std`-friendly.** Hot paths run in WASM bundles.
+- **Sinclair-12 witnesses are non-negotiable** for the deterministic u64 path
+  unless you can cite a smaller deterministic set proven for `n < 2^64`.
+- **Source-of-truth invariant**: build-time tables and runtime tests must be
+  generated by *the same* MR implementation. Don't propose schemes that fork
+  the truth source.
+
+---
+
+## 1. Binding context (ADR-151 summary)
+
+Five subsystems in a 60+-crate workspace need prime moduli (shard router, HNSW
+adjacency, sparsifier strides, mincut LSH, pi-brain witness chain). Today they
+all use `mod 2^k` and have documented pathologies. ADR-151 introduces *one*
+shared utility — `crates/ruvector-collections/src/primality.rs` — that all five
+will adopt across phases 1–5 (this PR is Phase 0 only: the utility itself).
+
+Design:
+- Deterministic MR for `u32` (witnesses {2, 7, 61}) and `u64` (Sinclair-12:
+  {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}).
+- Probabilistic MR for `u128` behind `--feature unstable-u128`, 40 rounds,
+  error `< 2⁻⁸⁰`.
+- **Dual path**: a build-time-generated table `PRIMES_BELOW_2K[57]` /
+  `PRIMES_ABOVE_2K[57]` covers k ∈ [8, 64] for power-of-two-aligned callers
+  (~1 ns table load); arbitrary inputs fall through to the general MR descent.
+- The table is generated by `build.rs` calling the *same* MR kernel that ships
+  at runtime (via `include!`). A `tests/table_cross_check.rs` re-validates
+  every entry under `cargo test`, so MR remains source of truth.
+
+Acceptance gates:
+1. `cargo test -p ruvector-collections primality` green, including pinned
+   pseudoprime regressions (OEIS A014233 entries 4, 5, 11).
+2. Table cross-check validates all 114 entries against MR.
+3. Bench targets met:
+   - `is_prime_u64` worst-case ≤ 50 ns (M-series), ≤ 200 ns (WASM)
+   - `prev_prime_below_pow2(k)` ≤ 1 ns (table)
+   - `next_prime_u64(arbitrary)` ≤ 2 µs
+   - `next_prime_u64(2^61)` ≤ 12 µs
+
+---
+
+## 2. The implementation as shipped
+
+### 2.1 Shared kernel — `src/primality_kernel.rs`
+
+`include!`d into both `build.rs` and `src/primality.rs`. Inner attributes are
+disallowed in `include!`d files, hence per-fn `#[allow(dead_code)]`.
+
+```rust
+// Deterministic Miller-Rabin kernel — ADR-151 (PIAL).
+//
+// `include!`d into two contexts (build.rs and src/primality.rs) which use
+// different subsets of the symbols. Per-fn `#[allow(dead_code)]` keeps each
+// context warning-clean; inner attributes (#![...]) aren't legal in
+// included files.
+
+#[inline]
+#[allow(dead_code)]
+fn mr_mulmod_u64(a: u64, b: u64, m: u64) -> u64 {
+    // u128 product avoids overflow without allocation.
+    ((a as u128).wrapping_mul(b as u128) % (m as u128)) as u64
+}
+
+#[inline]
+#[allow(dead_code)]
+fn mr_powmod_u64(mut base: u64, mut exp: u64, m: u64) -> u64 {
+    if m == 1 {
+        return 0;
+    }
+    let mut acc: u64 = 1;
+    base %= m;
+    while exp > 0 {
+        if exp & 1 == 1 {
+            acc = mr_mulmod_u64(acc, base, m);
+        }
+        exp >>= 1;
+        if exp > 0 {
+            base = mr_mulmod_u64(base, base, m);
+        }
+    }
+    acc
+}
+
+// Returns true iff `a` is a Miller-Rabin witness of compositeness for `n`.
+// Caller guarantees: n is odd, n > 3, and a in [2, n-2]. n - 1 = d * 2^s
+// with d odd (passed in pre-decomposed for speed).
+#[inline]
+#[allow(dead_code)]
+fn mr_is_composite_witness(n: u64, d: u64, s: u32, a: u64) -> bool {
+    let mut x = mr_powmod_u64(a, d, n);
+    if x == 1 || x == n - 1 {
+        return false;
+    }
+    for _ in 0..s.saturating_sub(1) {
+        x = mr_mulmod_u64(x, x, n);
+        if x == n - 1 {
+            return false;
+        }
+    }
+    true
+}
+
+#[inline]
+#[allow(dead_code)]
+fn mr_is_prime_u64(n: u64) -> bool {
+    if n < 2 {
+        return false;
+    }
+    const SMALL_PRIMES: [u64; 12] = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37];
+    for &p in &SMALL_PRIMES {
+        if n == p {
+            return true;
+        }
+        if n.is_multiple_of(p) {
+            return false;
+        }
+    }
+    // n is now odd, > 37, and coprime to every Sinclair witness.
+    let mut d = n - 1;
+    let mut s: u32 = 0;
+    while d & 1 == 0 {
+        d >>= 1;
+        s += 1;
+    }
+    for &a in &SMALL_PRIMES {
+        if mr_is_composite_witness(n, d, s, a) {
+            return false;
+        }
+    }
+    true
+}
+
+#[inline]
+#[allow(dead_code)]
+fn mr_is_prime_u32(n: u32) -> bool {
+    mr_is_prime_u64(n as u64)
+}
+
+#[inline]
+#[allow(dead_code)]
+fn mr_prev_prime_u64(upper: u64) -> u64 {
+    if upper <= 2 { return 0; }
+    if upper == 3 { return 2; }
+    let mut n = upper - 1;
+    if n & 1 == 0 { n -= 1; }
+    loop {
+        if mr_is_prime_u64(n) { return n; }
+        if n <= 3 { return 2; }
+        n -= 2;
+    }
+}
+
+#[inline]
+#[allow(dead_code)]
+fn mr_next_prime_u64(lower: u64) -> u64 {
+    if lower < 2 { return 2; }
+    if lower < 3 { return 3; }
+    let largest_u64_prime: u64 = u64::MAX - 58;
+    if lower >= largest_u64_prime { return 0; }
+    let mut n = lower + 1;
+    if n & 1 == 0 { n += 1; }
+    loop {
+        if mr_is_prime_u64(n) { return n; }
+        n += 2;
+    }
+}
+```
+
+### 2.2 Public API — relevant excerpts from `src/primality.rs`
+
+```rust
+include!("primality_kernel.rs");
+include!(concat!(env!("OUT_DIR"), "/prime_tables.rs"));
+// ↑ provides: pub const PRIMES_BELOW_2K: [u64; 57]
+//             pub const PRIMES_ABOVE_2K: [u64; 57]   (last entry = 0 sentinel)
+
+#[inline]
+pub fn is_prime_u32(n: u32) -> bool { mr_is_prime_u32(n) }
+
+#[inline]
+pub fn is_prime_u64(n: u64) -> bool { mr_is_prime_u64(n) }
+
+#[inline]
+pub fn prev_prime_below_pow2(k: u32) -> u64 {
+    debug_assert!((8..=64).contains(&k));
+    PRIMES_BELOW_2K[(k - 8) as usize]
+}
+
+#[inline]
+pub fn next_prime_above_pow2(k: u32) -> u64 {
+    debug_assert!((8..=63).contains(&k));
+    PRIMES_ABOVE_2K[(k - 8) as usize]
+}
+
+#[inline]
+pub fn prev_prime_u64(n: u64) -> u64 {
+    if n.is_power_of_two() {
+        let k = n.trailing_zeros();
+        if (8..=64).contains(&k) {
+            return PRIMES_BELOW_2K[(k - 8) as usize];
+        }
+    }
+    mr_prev_prime_u64(n)
+}
+
+#[inline]
+pub fn next_prime_u64(n: u64) -> u64 {
+    if n.is_power_of_two() {
+        let k = n.trailing_zeros();
+        if (8..=63).contains(&k) {
+            return PRIMES_ABOVE_2K[(k - 8) as usize];
+        }
+    }
+    mr_next_prime_u64(n)
+}
+
+#[inline]
+pub fn ephemeral_prime(seed: u64) -> u64 {
+    // Used by pi-brain witness chain (ADR §4.4) for per-share entropy.
+    let mask = (1u64 << 61) - 1;
+    let s = (seed | 1) & mask;
+    if mr_is_prime_u64(s) { s } else { mr_next_prime_u64(s) }
+}
+
+// u128 probabilistic mode (cfg-gated on `unstable-u128`):
+#[cfg(feature = "unstable-u128")]
+pub fn is_prime_u128(n: u128, rounds: u8) -> bool { /* … 40-round MR with
+    a tiny seeded LCG for witness selection; deferred to mr_is_prime_u64
+    when n <= u64::MAX */ }
+```
+
+### 2.3 Tests asserting correctness
+
+- `tests/primality_pseudoprimes.rs`:
+  - `is_prime_u64(3_215_031_751) == false` (OEIS A014233(4), spsp to {2,3,5,7})
+  - `is_prime_u64(2_152_302_898_747) == false` (A014233(5))
+  - `is_prime_u64(3_825_123_056_546_413_051) == false` (A014233(11), detected
+    only by base 37 — canary for anyone shrinking Sinclair-12)
+  - All primes/composites in [0, 100], 7 Carmichael numbers, edges
+    (0, 1, u64::MAX, u64::MAX − 58, largest u32 prime).
+- `tests/table_cross_check.rs`:
+  - For each k ∈ [8, 64]: assert `is_prime_u64(PRIMES_BELOW_2K[k-8])` and
+    sweep every odd integer in `(table[k-8], 2^k)` asserting non-primality.
+  - Symmetric for k ∈ [8, 63] on `PRIMES_ABOVE_2K`.
+  - Sentinel: `PRIMES_ABOVE_2K[64-8] == 0`.
+
+---
+
+## 3. Measurements (criterion, M-series, release profile)
+
+### 3.1 Phase-0 benches against the PRD targets
+
+| Bench                                      | Measured  | Target  | Status |
+|--------------------------------------------|-----------|---------|--------|
+| `prev_prime_below_pow2(32)` (table)        | 552 ps    | ≤ 1 ns  | green  |
+| `next_prime_u64(2^61 − 1)` general MR      | 10.97 µs  | ≤ 12 µs | green  |
+| `next_prime_u64(arbitrary ≈ 1e9)` general  | 2.23 µs   | ≤ 2 µs  | +11%   |
+| `is_prime_u64(u64::MAX − 58)` worst-case   | **15.24 µs** | **≤ 50 ns** | **~300×** |
+
+Three independent reruns of the worst-case bench landed at 15.24 / 15.79 /
+15.65 µs — stable within ±2%, not measurement noise.
+
+### 3.2 Apples-to-apples competitor baseline
+
+To rule out "this machine is slow today" or "criterion is mismeasuring", we
+built a throwaway scratch crate (outside the workspace) that compiles a
+verbatim copy of our kernel alongside `num-prime` 0.4.4. Both run in the
+same binary on the same input, with the same release profile
+(`opt-level = 3`, `lto = "thin"`, `codegen-units = 1`).
+
+| Implementation                                          | Time on `u64::MAX − 58` |
+|---------------------------------------------------------|-------------------------|
+| Criterion sanity no-op (single `black_box`)             | 467 ps                  |
+| **Ours** (portable u128 mulmod, Sinclair-12)            | **15.63 µs**            |
+| **`num-prime` 0.4.4** (Montgomery via `num-modular`)    | **884 ns**              |
+| PRD §6 target                                           | 50 ns                   |
+
+Both implementations agreed on primality (no correctness gap). The 467 ps
+sanity baseline confirms criterion is reporting honestly — broken benches
+don't produce 467 ps for a no-op.
+
+**What this tells us:**
+
+1. **Our 15.63 µs is real and reproducible**, not a measurement artifact.
+2. **We are ~17.7× slower than `num-prime`** on the same input. The
+   delta is almost certainly Montgomery-form modular multiplication
+   (`num-prime` pulls `num-modular`, which provides exactly that).
+3. **`num-prime` itself is ~17.7× slower than the 50 ns target.** No
+   pure-Rust general-purpose primality crate we know of hits 50 ns on an
+   actual large prime; the realistic safe-Rust floor on M-series appears
+   to be ~880 ns.
+4. The PRD's 50 ns figure is therefore *unachievable* in safe Rust — it
+   would require Montgomery + assembly / SIMD batching across many `n` /
+   leaving the safe subset entirely.
+
+ADR-151 forbids `num-prime` as a *runtime* dependency, but does not forbid
+us from porting Montgomery into our own kernel — `num-modular` is
+MIT/Apache and the technique itself is textbook. That is now a *measured*
+optimization target with a known ceiling, not a guess.
+
+---
+
+## 4. What we are asking you to do
+
+Produce **one document** with the four sections below. Be specific. Cite
+sources where possible. Do not propose changes that would violate the
+constraints in the preamble.
+
+### Section A — Correctness audit
+
+Read §2.1 and §2.2. Identify:
+1. Any soundness bug (a composite that would be reported prime, or vice
+   versa) within the documented input ranges.
+2. Edge cases not covered by the tests in §2.3 that you would add.
+3. Any way the table cross-check could pass while masking a real bug
+   (i.e. is the test actually load-bearing?).
+4. Risks specific to `ephemeral_prime`'s seed → prime mapping when used
+   for witness-chain fingerprinting (ADR §4.4): collisions, attacker
+   precomputation, distribution issues.
+
+### Section B — Performance plan, ranked
+
+The sharpened goal, given §3.2's competitor baseline: **close the 17.7×
+gap to `num-prime` (15.63 µs → ~880 ns) in pure safe Rust, without taking
+`num-prime` or `num-modular` as a runtime dependency**, AND hit the
+`next_prime_u64` arbitrary 2 µs target. Treat 50 ns as aspirational; we
+expect you to recommend a revised PRD target with justification.
+
+For each proposal:
+- **Mechanism**: what changes in code (one paragraph, no hand-waving;
+  reference §2.1 line ranges where applicable).
+- **Expected speedup vs our 15.63 µs baseline**: cite source or give a
+  back-of-envelope; if the technique is what `num-prime` uses, say so.
+- **Cost**: code complexity (LoC, conceptual difficulty for reviewers),
+  WASM bundle size, any new `unsafe`.
+- **Compatibility**: does it preserve the source-of-truth invariant
+  (build.rs and runtime use the same kernel via `include!`)? Does it
+  break `no_std`?
+- **Validation methodology**: the *exact* benchmark and regression test
+  you would add to prove the speedup is real and stable, including
+  the criterion config you would use and the statistical threshold for
+  declaring "passed".
+
+Rank proposals by `expected_speedup × feasibility / complexity`.
+
+Candidate techniques to consider (extend or reject as you see fit):
+- **Montgomery-form modular arithmetic** — likely the single biggest
+  lever based on the `num-prime` comparison. We want a concrete sketch
+  of the API change and a LoC estimate for porting it into our kernel.
+- Wheel factorization (mod 30 / mod 210) for the small-prime screen.
+- Branchless witness loops.
+- Reduced witness sets for sub-ranges (e.g. {2} for n < 2047,
+  {2, 3} for n < 1.4 × 10^9).
+- Strong-base early-exit ordering (which witness fails fastest on
+  random composites?).
+- BPSW (Baillie-PSW) instead of MR — different correctness story; we'd
+  need a citation for deterministic-up-to-2^64 status.
+- Strong Lucas as a deterministic add-on.
+- Pre-screen by Pollard rho for small-factor composites (does this even
+  beat trial division for the tiny gap between 37 and our actual call
+  rate?).
+
+Specifically address:
+1. **Realistic safe-Rust floor for `is_prime_u64` worst-case on M-series.**
+   Our measurement suggests ~880 ns (matching `num-prime`). Confirm or
+   refute, with reasoning.
+2. **Recommended revised PRD target**, given that floor.
+3. **The `next_prime_u64(arbitrary)` 2.23 µs vs 2 µs gap** — is this
+   meaningful or noise-band? If real, what closes it?
+
+### Section C — Architectural review
+
+1. Is the dual-path design (table + MR fallback) correctly capturing the
+   workload of the five named consumers (shard router, HNSW buckets,
+   sparsifier, mincut LSH, witness chain)? Any consumer where the table
+   would mislead?
+2. Is `tests/table_cross_check.rs` sufficient as the source-of-truth gate,
+   or is there a stronger invariant we should assert?
+3. Does `include!` of the kernel into both `build.rs` and `src/primality.rs`
+   create any failure mode you have seen burn other projects?
+4. The `unstable-u128` feature uses Russian-peasant `mulmod_u128` and a
+   tiny seeded LCG for witness selection. Is that sound for the
+   probabilistic guarantee `Pr[err] < 2⁻⁸⁰` at 40 rounds?
+
+### Section D — Validation methodology
+
+For the *whole* Phase-0 deliverable, propose:
+1. The minimum set of CI gates that would catch a regression in either
+   correctness or performance, and where they should run (PR / nightly /
+   release).
+2. A reproducible benchmark harness that distinguishes signal from noise
+   on contended hardware (criterion is fine; what statistical thresholds
+   would you set for "pass"?).
+3. A property-test (proptest/quickcheck-style) strategy that would
+   complement the pinned regressions in §2.3 without re-deriving MR.
+4. Anything you would add to the `tests/` or `benches/` directory before
+   merging Phase 0.
+
+---
+
+## 5. Format of your response
+
+Plain markdown. Sections A/B/C/D headed exactly as above. For Section B,
+use a table sorted by your ranking. End with a one-paragraph **Verdict**:
+should the PR merge as-is, merge with the PRD §6 row relaxed, or block
+on a specific change?
+
+Do not be polite. If a proposal in our implementation is wrong, say so
+directly with line-numbered references into §2.1 / §2.2.
--- a/docs/research/miller-rabin-optimizations/HANDOFF.md
+++ b/docs/research/miller-rabin-optimizations/HANDOFF.md
@ -0,0 +1,113 @@
+# Handoff — Phase 0 Kickoff (PIAL)
+
+You are starting **Phase 0** of PIAL (Prime-Indexed Acceleration Layer):
+land the Miller-Rabin primality utility in `ruvector-collections` and
+nothing else. Five integration phases follow in separate PRs.
+
+## Read first (in order)
+
+1. **`docs/adr/ADR-151-miller-rabin-prime-optimizations.md`** — the binding
+   decision (status, scope, acceptance criteria, alternatives rejected).
+2. **`docs/research/miller-rabin-optimizations/PRD.md`** — full design,
+   five creative use-cases, performance targets, six-phase rollout, risks.
+3. **This file** — Phase 0 specifics. Do not skip.
+
+## Branch
+
+`feat/miller-rabin-prime-optimizations` (off `main`). Already created.
+
+## Target crate
+
+`crates/ruvector-collections/` already exists in the workspace. Today it
+contains `collection.rs`, `error.rs`, `lib.rs`, `manager.rs`. No
+`benches/` directory and no `build.rs` yet — both are Phase 0 work.
+
+## Phase 0 Deliverables (four files, one PR)
+
+| File | Purpose | Source of truth |
+|---|---|---|
+| `src/primality.rs` | Deterministic Miller-Rabin for u32/u64; probabilistic for u128; tabled `prev_prime_below_pow2` / `next_prime_above_pow2` fast paths; general `prev_prime_u64` / `next_prime_u64` MR-descent paths; `ephemeral_prime(seed)` for the witness chain | PRD §5 |
+| `build.rs` | Generate `PRIMES_BELOW_2K[57]` and `PRIMES_ABOVE_2K[57]` (k ∈ [8, 64]) from the MR implementation at compile time; emit as `${OUT_DIR}/prime_tables.rs` for `include!`-inclusion in `primality.rs` | ADR-151 "Generation Strategy" |
+| `benches/primality.rs` | Criterion benches: `is_prime_u64`, `prev_prime_below_pow2`, `next_prime_u64(arbitrary)`, `next_prime_u64(2^61)`. Targets in PRD §6 | PRD §6 |
+| `tests/table_cross_check.rs` | For every k ∈ [8, 64], assert `is_prime_u64(PRIMES_BELOW_2K[k-8])` is true and that no prime exists in `(PRIMES_BELOW_2K[k-8], 2^k)`. Same for `_ABOVE_`. This is the gate that makes MR the source of truth | ADR-151 acceptance #2 |
+
+## Library wiring
+
+Add `pub mod primality;` to `crates/ruvector-collections/src/lib.rs` and
+re-export the public API at the crate root. Update the crate-level
+doc-comment to mention the new module.
+
+## Dependencies — explicitly do not add
+
+The PRD rejects `num-prime`, `glass_pumpkin`, and any other external
+prime/big-integer crates. Use **only** `core` integer arithmetic.
+Add `criterion` under `[dev-dependencies]` for benches if it is not
+already inherited via the workspace.
+
+## Witnesses (the whole correctness story in three lines)
+
+- `u32`: `{ 2, 7, 61 }` — Pomerance/Selfridge/Wagstaff. Deterministic.
+- `u64`: `{ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37 }` — Sinclair (2011). Deterministic.
+- `u128`: 40 random rounds, **only** behind `--feature unstable-u128`. Probabilistic, error < 2⁻⁸⁰.
+
+## Pinned pseudoprime regressions
+
+Include these in `tests/primality_pseudoprimes.rs` so future witness-set
+"optimizations" cannot silently regress correctness:
+
+- `3_215_031_751` — strong pseudoprime to bases {2, 3, 5, 7} (must be detected by Sinclair-12).
+- `2_152_302_898_747` — strong pseudoprime to {2, 3, 5, 7, 11}.
+- `3_825_123_056_546_413_051` — large 64-bit known-hard composite.
+
+Add small-prime sanity (1, 2, 3, 4, 5, 7, 9, ..., 100) and edge cases
+(0, 1, `u64::MAX`, `u64::MAX - 58` which is the largest u64 prime).
+
+## Performance targets (from PRD §6)
+
+| Operation | M-series | WASM |
+|---|---|---|
+| `is_prime_u64` worst-case | ≤ 50 ns | ≤ 200 ns |
+| `prev_prime_below_pow2(k)` (table) | ≤ 1 ns | ≤ 2 ns |
+| `next_prime_u64(2^32)` (table) | ≤ 1 ns | ≤ 2 ns |
+| `next_prime_u64(arbitrary N)` (general MR) | ≤ 2 µs | ≤ 8 µs |
+| `next_prime_u64(2^61)` (general MR) | ≤ 12 µs | ≤ 40 µs |
+
+## Phase 0 is "Done" when
+
+ADR-151 acceptance criteria #1, #2, #3 are all green:
+
+1. `cargo test -p ruvector-collections primality` passes (includes pinned pseudoprimes).
+2. `cargo test -p ruvector-collections primality::table_cross_check` validates all 114 table entries against MR.
+3. `cargo bench -p ruvector-collections primality` meets the targets above on M-series.
+
+**Do not start Phase 1 in this PR.** Phases ship as separate PRs
+(PRD §7). Keep this one tightly scoped to the utility itself.
+
+## First commands in the new session
+
+```bash
+# Confirm you are on the right branch
+git status   # should show "On branch feat/miller-rabin-prime-optimizations" with no changes
+
+# Baseline — confirm the crate compiles before you touch it
+cargo check -p ruvector-collections
+
+# Re-read the binding documents
+cat docs/adr/ADR-151-miller-rabin-prime-optimizations.md | head -80
+cat docs/research/miller-rabin-optimizations/PRD.md | sed -n '150,260p'   # §5 API + §6 perf
+```
+
+Then start with `crates/ruvector-collections/src/primality.rs`. The
+deterministic u64 Miller-Rabin is ~80 lines including comments;
+everything else (tables via `build.rs`, benches, cross-check test)
+follows mechanically from it.
+
+## What is explicitly **not** Phase 0
+
+- Editing `crates/ruvector-graph/` (that's Phase 1).
+- Editing any HNSW crate (Phase 2).
+- Editing sparsifier or attn-mincut (Phase 3).
+- Editing `crates/mcp-brain-server/` or pi-brain payloads (Phase 4).
+- Editing CNN / quantization codebooks (Phase 5).
+
+If you find yourself touching any of those, stop and split the PR.
--- a/docs/research/miller-rabin-optimizations/PRD.md
+++ b/docs/research/miller-rabin-optimizations/PRD.md
@ -0,0 +1,369 @@
+# PRD: Prime-Indexed Acceleration Layer (PIAL)
+
+> Creative Miller-Rabin–driven optimizations for ruvector's hashing,
+> sharding, sketching, and witness-chain layers.
+
+| Field              | Value                                                |
+|--------------------|------------------------------------------------------|
+| **Status**         | Draft                                                |
+| **Date**           | 2026-04-16                                           |
+| **Owner**          | RuVector Core / Architecture                         |
+| **Related ADR**    | ADR-151 (this PRD's binding decision record)         |
+| **Cross-refs**     | ADR-027 (HNSW), ADR-038 (witness), ADR-058 (hash),   |
+|                    | ADR-148/149 (brain perf), ADR-150 (π-brain)          |
+| **Tier (ADR-026)** | T1 (Agent Booster eligible) for the core utility;    |
+|                    | T2 (Haiku) for the integration patches.              |
+
+---
+
+## 1. Background
+
+Three years of incremental work have left ruvector with several places where
+**arithmetic on indices, hashes, and shard keys defaults to power-of-two
+moduli** — convenient on hardware (`x & (N - 1)`), pathological on real data:
+
+| Site                                              | Current modulus    | Failure mode                                               |
+|---------------------------------------------------|--------------------|------------------------------------------------------------|
+| `ruvector-graph` shard router (ADR-058 #6)        | `xxh3_64() mod 2^k`| ~50% collision @ 2³² nodes; biased on Zipfian keys         |
+| `micro-hnsw-wasm` adjacency map                   | open-addressed 2^k | clustering on near-duplicate vectors (e.g. timestamps)     |
+| `ruvector-sparsifier` stride sampler              | power-of-2 stride  | aliasing on lattice / image-grid graphs                    |
+| `ruvector-attn-mincut` LSH sketch                 | ad-hoc constant    | breaks 2-independence of universal hash family             |
+| pi-brain witness fingerprint (ADR-038)            | XXH3 only          | single-hash tamper risk; no per-share entropy              |
+
+The fix in every one of these is **the same primitive**: a fast, deterministic
+primality test that lets us mint a prime *near a target size* on demand.
+
+We choose **Miller-Rabin** because it is:
+
+- **Deterministic** for all `u64` inputs with the Sinclair witness set
+  `{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}` — no probabilistic guarantees
+  needed for our hot paths.
+- **O(k · log³ n)** — a `next_prime(2^32)` call costs ~2 µs in benchmarks;
+  amortized to zero against shard-rebalance cycles.
+- **WASM-friendly** — pure integer arithmetic, no FFI, fits in <1 KB compiled.
+- **Tier-1 eligible** under ADR-026 — pure transform work, no LLM in the loop.
+
+This PRD frames a single, surgically scoped utility (`primality.rs`) that
+*unblocks* a portfolio of creative optimizations across the workspace. The
+binding architectural commitments live in ADR-151.
+
+---
+
+## 2. Goals
+
+| # | Goal                                                                 | Metric / Acceptance                                  |
+|---|----------------------------------------------------------------------|------------------------------------------------------|
+| G1| Provide `is_prime`, `next_prime`, `prev_prime` over `u32`/`u64`     | Deterministic, ≥ 200 M ops/s on M-series             |
+| G2| Re-shard ruvector-graph by prime modulus                             | ≥ 30% reduction in shard-load std-dev on Zipfian load|
+| G3| Convert HNSW adjacency tables to prime-bucket open addressing        | ≥ 15% drop in p99 insert latency at 1 M vectors      |
+| G4| Replace LSH stride/modulus constants with certified primes           | Restore 2-independence; pass property tests          |
+| G5| Add per-share ephemeral prime fingerprint to π-brain witness chain   | +8 bytes/share; published in `brain_share` payload   |
+| G6| Cross-target: the utility compiles for native, WASM, and `no_std`    | Single crate, no feature-flag explosion              |
+
+## 3. Non-Goals
+
+- **No cryptographic key generation.** Miller-Rabin alone is *not* a substitute
+  for proven-prime generation in RSA/ECC; we only use it for hashing/sharding.
+- **No new heap allocations** in the inner loop — the utility must be
+  allocation-free past the (constant-size) witness array.
+- **No replacement** of `prime-radiant` (which is a coherence-gate crate and
+  unrelated despite the name collision).
+- **No big-integer support.** 64-bit (and an opt-in `u128` mode) is enough for
+  every ruvector use case identified above.
+- **No SHAKE/HMAC redesign.** ADR-058's other findings stand independently.
+
+---
+
+## 4. Creative Use-Cases (the "why this is interesting")
+
+### 4.1 Prime-Modulus Shard Routing — *direct fix for ADR-058 #6*
+
+Today's shard router is `xxh3_64(node_id) & (shards - 1)`. The mask discards
+all but `log₂(shards)` bits of entropy, which is exactly when adversarial /
+Zipfian inputs cluster. Replacing it with `xxh3_64(node_id) % p`, where
+`p = prev_prime(shards)`, recovers full entropy and gives provably balanced
+buckets under universal hashing.
+
+> **Creative twist:** because `prev_prime(k)` is cheap, we can *adapt* the
+> modulus during a rolling re-shard (every N minutes) — the cluster never
+> sees a power-of-two pathology because the modulus literally never *is* a
+> power of two for two consecutive epochs.
+
+### 4.2 Prime-Bucket HNSW Adjacency
+
+`micro-hnsw-wasm` and `ruvector-hyperbolic-hnsw` store edges in open-addressed
+tables sized to the next power of two. Probe-sequence collisions on
+near-duplicate vectors (e.g. real-time sensor or timestamp embeddings) blow up
+p99 insert latency. Switching to `prev_prime(2^k)` capacity with linear or
+quadratic probing keeps the table size cache-friendly while breaking the
+power-of-two clustering.
+
+### 4.3 Certified Modulus for Universal LSH
+
+Several sketch modules (`ruvector-attn-mincut`, sparsifier samplers) build
+hash families of the form `((a · x + b) mod p) mod m`. The 2-independence
+guarantee *requires* `p` to be prime and `> universe_size`. Today these are
+hand-picked Mersenne-shaped constants (`2^61 − 1`, `2^31 − 1`); when the
+universe grows past those bounds the family silently degrades. Miller-Rabin
+lets us call `next_prime(universe_size)` on dataset load and store the chosen
+modulus alongside the index.
+
+### 4.4 Witness-Chain Ephemeral Primes (π-brain)
+
+The pi-brain witness chain (ADR-038, CLAUDE.md "Witness Chain Rules")
+currently fingerprints each shared memory with XXH3 only. We propose:
+
+```text
+share = { payload, fingerprint_xxh3, ephemeral_prime q, fingerprint_modq }
+        where q = next_prime( seed = SHA256(payload)[0..8] )
+```
+
+A tampering peer attempting to substitute payloads must collide *both*
+fingerprints — including a hash modulo a prime `q` they cannot precompute,
+because `q` is derived per-share. Cost: 8 bytes on the wire, ~2 µs at the
+sender, ~50 ns at every verifier. The asymmetry is the point.
+
+### 4.5 Anti-Aliasing Stride for Sparsifier Sampling
+
+Spectral sparsifiers in `ruvector-sparsifier` use stride-based subsampling
+when the importance sketch is too expensive. Power-of-two strides alias
+brutally on grid-structured graphs (image, mesh, lattice). A prime stride
+breaks the alignment for the same reason linear-congruential generators
+demand prime moduli — borrowed wisdom, decades old, free to reuse.
+
+### 4.6 Bonus: Prime-Sized Quantization Codebooks
+
+Product-quantization codebooks (used by ruvector-cnn-wasm and ruQu) sized to
+prime cardinalities show measurably better recall@k on standard benchmarks
+than power-of-two codebooks because they break the implicit "code-of-codes"
+correlation across sub-spaces. This is an opt-in mode, not a default.
+
+---
+
+## 5. Proposed Architecture
+
+```
+┌──────────────────────────────────────────────────────────────┐
+│  crates/ruvector-collections/src/primality.rs  (new, ~250 LoC) │
+│                                                                │
+│   pub fn is_prime_u32(n: u32) -> bool         // {2,7,61}     │
+│   pub fn is_prime_u64(n: u64) -> bool         // Sinclair-12  │
+│   pub fn is_prime_u128(n: u128, k: u8) -> bool // probabilistic│
+│   pub fn next_prime_u64(n: u64) -> u64                        │
+│   pub fn prev_prime_u64(n: u64) -> u64                        │
+│   pub fn ephemeral_prime(seed: u64) -> u64    // for §4.4     │
+│                                                                │
+│   #[cfg(target_arch = "wasm32")] // shares same impl          │
+└──────────────────┬───────────────────────────┬────────────────┘
+                   │                           │
+        ┌──────────┴──────────┐      ┌─────────┴───────────┐
+        ▼                     ▼      ▼                     ▼
+  shard router          HNSW buckets   LSH families     witness chain
+  (ruvector-graph)      (micro-hnsw)   (sparsifier,     (mcp-brain-server,
+                                        attn-mincut)     pi-brain)
+```
+
+### Why `ruvector-collections`?
+
+- It already houses cross-cutting data-structure utilities.
+- All five consumers depend on it transitively, so no new edges in the
+  dependency graph.
+- Keeps the workspace top-level crate count flat (we have 60+ already).
+
+### Public API (sketch)
+
+```rust
+//! crates/ruvector-collections/src/primality.rs
+//!
+//! Deterministic Miller-Rabin primality for u32/u64 and probabilistic
+//! Miller-Rabin for u128. Allocation-free, no_std-friendly.
+//!
+//! Hot-path strategy: tabled primes for the common power-of-two-aligned
+//! sizes (zero runtime cost), Miller-Rabin descent as the general fallback.
+
+#[inline]
+pub const fn is_prime_u32(n: u32) -> bool { /* witnesses: 2, 7, 61 */ }
+
+#[inline]
+pub const fn is_prime_u64(n: u64) -> bool {
+    // Sinclair (2011): deterministic for all u64
+    // witnesses: 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37
+}
+
+pub fn is_prime_u128(n: u128, rounds: u8) -> bool { /* probabilistic */ }
+
+// ── Generation: dual-path ────────────────────────────────────────────
+//
+// Fast path: lookup table for "largest prime < 2^k", k ∈ [8, 64].
+// CI validates every entry against the Miller-Rabin descent at build
+// time, so the table is never the source of truth — MR is.
+const PRIMES_BELOW_2K: [u64; 57] = [
+    251,                          // < 2^8
+    509,                          // < 2^9
+    1021,                         // < 2^10
+    // ... entries for k = 11..=31 ...
+    4_294_967_291,                // < 2^32  (shard-router common case)
+    // ... entries for k = 33..=63 ...
+    18_446_744_073_709_551_557,   // < 2^64
+];
+
+#[inline]
+pub const fn prev_prime_below_pow2(k: u32) -> u64 {
+    debug_assert!(k >= 8 && k <= 64);
+    PRIMES_BELOW_2K[(k - 8) as usize]
+}
+
+#[inline]
+pub fn prev_prime_u64(n: u64) -> u64 {
+    // Fast path: power-of-two-aligned inputs (HNSW buckets, shard sizes)
+    if n.is_power_of_two() && n.trailing_zeros() >= 8 {
+        return prev_prime_below_pow2(n.trailing_zeros());
+    }
+    // General path: 6k±1 wheel + Miller-Rabin descent
+    miller_rabin_descent(n, Direction::Down)
+}
+
+#[inline]
+pub fn next_prime_u64(n: u64) -> u64 {
+    if n.is_power_of_two() && n.trailing_zeros() >= 8 {
+        // Symmetric optional fast path: PRIMES_ABOVE_2K table
+        return next_prime_above_pow2(n.trailing_zeros());
+    }
+    miller_rabin_descent(n, Direction::Up)
+}
+
+pub fn ephemeral_prime(seed: u64) -> u64 {
+    // seed → next_prime((seed | 1) % 2^61) — used by witness chain (§4.4)
+    // No table — input is unpredictable by design.
+}
+```
+
+### Why the dual-path matters
+
+Three of PIAL's five generation sites (shard router, HNSW bucket sizing,
+sparsifier strides) ask for primes near *fixed* sizes that never change
+between releases. The table converts those calls into a single L1-cached
+load — no Miller-Rabin work at runtime at all.
+
+The two unpredictable sites (LSH universe, witness-chain ephemeral primes)
+fall through to the general MR path. They're cold paths anyway —
+microsecond-scale generation cost is invisible against the surrounding work.
+
+**Crucially, MR is still the source of truth.** A `build.rs` script
+regenerates `PRIMES_BELOW_2K` and `PRIMES_ABOVE_2K` from the MR
+implementation on every build, and a `#[test]` cross-checks every entry
+under `cargo test`. The table is an *amortization*, not a substitute.
+
+| Generation site             | Path taken         | Runtime cost |
+|-----------------------------|--------------------|--------------|
+| Shard router (`prev_prime(2^k)`)  | Fast (table)       | ~1 ns        |
+| HNSW bucket (`prev_prime(2^k)`)   | Fast (table)       | ~1 ns        |
+| Sparsifier stride (table-friendly)| Fast (table)       | ~1 ns        |
+| LSH modulus (`next_prime(N)`)     | General (MR)       | ~250 ns      |
+| Witness ephemeral (`next_prime(seed)`)| General (MR)   | ~250 ns      |
+
+---
+
+## 6. Performance Targets
+
+> **Revised 2026-04-16 (Phase 0).** The original `is_prime_u64` worst-case
+> target of 50 ns was found to be unachievable in pure safe Rust;
+> `num-prime` itself measures ~880 ns on the same hardware. Target relaxed
+> to track the empirical safe-Rust ceiling. See §6.1 and the Phase 0
+> Findings section of ADR-151 for the full justification.
+
+| Operation                                      | Target (M-series)   | Target (WASM)      |
+|------------------------------------------------|---------------------|--------------------|
+| `is_prime_u64(p)` (worst-case)                 | **≤ 1 µs** *(was 50 ns)* | **≤ 4 µs** *(was 200 ns)* |
+| `prev_prime_below_pow2(k)` (table fast path)   | **≤ 1 ns**          | **≤ 2 ns**         |
+| `next_prime_u64(2^32)` (table fast path)       | **≤ 1 ns**          | **≤ 2 ns**         |
+| `next_prime_u64(arbitrary N)` (general MR path)| ≤ 2 µs              | ≤ 8 µs             |
+| `next_prime_u64(2^61)` (general MR path)       | ≤ 12 µs             | ≤ 40 µs            |
+| Shard re-route on 1 M nodes                    | ≤ 30 ms (one-shot)  | n/a                |
+| HNSW p99 insert @ 1 M vectors                  | -15% vs baseline    | -10% vs baseline   |
+| WASM bundle growth from `PRIMES_BELOW_2K`+`_ABOVE_2K` | n/a          | ≤ 1 KB total       |
+
+Benchmarks live in `crates/ruvector-collections/benches/primality.rs` and run
+under existing `npm run bench` infrastructure.
+
+### 6.1 Empirical findings (Phase 0)
+
+Phase 0 measurements on M-series, criterion release profile:
+
+| Bench                                      | Measured  | Revised target | Status |
+|--------------------------------------------|-----------|----------------|--------|
+| `prev_prime_below_pow2(32)`                | 552 ps    | ≤ 1 ns         | met    |
+| `next_prime_u64(2^61 − 1)`                 | 10.97 µs  | ≤ 12 µs        | met    |
+| `next_prime_u64(arbitrary ≈ 1e9)`          | 2.23 µs   | ≤ 2 µs         | +11%   |
+| `is_prime_u64(u64::MAX − 58)` worst-case   | 15.24 µs  | ≤ 1 µs         | does not meet revised target — Phase 0.1 |
+
+A throwaway scratch crate compiling a verbatim copy of our kernel
+alongside `num-prime` 0.4.4 in the same binary on the same input
+measured **ours = 15.63 µs, num-prime = 884 ns** (criterion sanity no-op
+= 467 ps confirms harness honesty). The 17.7× gap is recoverable in pure
+safe Rust by porting Montgomery-form modular multiplication into
+`mr_mulmod_u64` / `mr_powmod_u64` (~80 LoC). That is Phase 0.1 scope and
+ships in a separate PR; see ADR-151 "Phase 0 Findings" for the full plan
+and the explicit rejection of the empirical 7-witness "Sinclair" set as
+a correctness regression dressed as a perf win.
+
+---
+
+## 7. Rollout Plan
+
+| Phase | Scope                                                                   | Gate                                       |
+|-------|-------------------------------------------------------------------------|--------------------------------------------|
+| **0** | Land `primality.rs` + tests + benches in `ruvector-collections`         | `npm test && npm run lint` green           |
+| **1** | Wire `next_prime` into ruvector-graph shard router behind feature flag  | A/B Zipfian load; ≥ 30% std-dev reduction  |
+| **2** | Convert HNSW adjacency to prime buckets (micro-hnsw-wasm first)         | recall@k unchanged; p99 insert -15%        |
+| **3** | Switch sparsifier + attn-mincut LSH families to certified primes        | property tests pass; no regression in cuts |
+| **4** | Ship ephemeral-prime fingerprint in pi-brain witness payload (opt-in)   | `brain_share` accepts new field; verifiers |
+|       |                                                                         | tolerant of absence (backward compatible)  |
+| **5** | Optional: prime-sized PQ codebooks in ruvector-cnn-wasm                 | recall@10 ≥ baseline on SIFT-1M            |
+
+Each phase is a separate PR; no big-bang merge.
+
+---
+
+## 8. Risks & Mitigations
+
+| Risk                                                            | Mitigation                                                     |
+|-----------------------------------------------------------------|----------------------------------------------------------------|
+| Modulo-by-prime is a *division*, slower than mask               | Use Lemire's `fastmod` (one mul + one shift) — already in tree |
+| Sinclair witness set has subtle bugs in edge cases (n < 9)      | Hard-code small-prime fast path + 100% branch coverage tests   |
+| WASM `u128` codegen is ~5× slower than native                   | u128 mode is opt-in; default paths are u64                     |
+| Cluster mid-flight reshard exposes intermediate state           | Phase 1 ships behind `--feature prime-shard`; rollout is gated |
+| Witness-chain change breaks older pi-brain peers                | New field is `Option<…>`; verifiers ignore-on-absent           |
+| "Yet another collections crate" sprawl                          | All work lives in *existing* `ruvector-collections`            |
+
+---
+
+## 9. Open Questions
+
+1. Should `next_prime_u64` accept a *budget* (max-distance) and return
+   `Option<u64>` instead of looping unbounded? (Probably yes.)
+2. Do we want a `PrimeModHash<H>` newtype wrapper that auto-applies fastmod,
+   or expose `prev_prime` and let callers compose? (Lean: wrapper.)
+3. Does the witness-chain ephemeral prime need to be authenticated under the
+   sender's key, or is per-share derivation from `SHA256(payload)` enough?
+   (Defer to security review during Phase 4.)
+
+---
+
+## 10. Out of Scope (deliberately)
+
+- Big-integer / arbitrary-precision Miller-Rabin (use `num-bigint` if ever
+  needed — not on the roadmap).
+- Replacing XXH3 as ruvector's primary hash (ADR-058's job).
+- Strong-pseudoprime-based Lucas certificates (yagni for hashing).
+- Distributed prime-generation protocols (we mint locally, deterministically).
+
+---
+
+## 11. Approval Checklist
+
+- [ ] Architecture review (links ADR-151)
+- [ ] Security review (esp. §4.4 witness chain)
+- [ ] Performance baseline captured for shard-router and HNSW p99
+- [ ] WASM size budget verified (`micro-hnsw-wasm` < +2 KB)
+- [ ] Documentation: README in `ruvector-collections` references new module