mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-30 03:53:34 +00:00
feat(collections): PIAL Phase 0 — Miller-Rabin primality kernel + prime tables (#358)
feat(collections): PIAL Phase 0 — Miller-Rabin primality kernel + prime tables
This commit is contained in:
commit
855d8faec4
13 changed files with 2098 additions and 1 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -9124,6 +9124,7 @@ version = "2.2.0"
|
|||
dependencies = [
|
||||
"bincode 2.0.1",
|
||||
"chrono",
|
||||
"criterion 0.5.1",
|
||||
"dashmap 6.1.0",
|
||||
"parking_lot 0.12.5",
|
||||
"ruvector-core 2.2.0",
|
||||
|
|
|
|||
|
|
@ -7,6 +7,13 @@ authors.workspace = true
|
|||
repository.workspace = true
|
||||
readme = "README.md"
|
||||
description = "High-performance collection management for Ruvector vector databases"
|
||||
build = "build.rs"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
# Opt-in probabilistic Miller-Rabin for u128 (PRD §5, ADR-151).
|
||||
# WASM u128 codegen is ~5× slower than native; gate keeps it out of default bundles.
|
||||
unstable-u128 = []
|
||||
|
||||
[dependencies]
|
||||
ruvector-core = { version = "2.0.2", path = "../ruvector-core" }
|
||||
|
|
@ -20,3 +27,9 @@ bincode = { workspace = true }
|
|||
chrono = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5", features = ["html_reports"] }
|
||||
|
||||
[[bench]]
|
||||
name = "primality"
|
||||
harness = false
|
||||
|
||||
|
|
|
|||
57
crates/ruvector-collections/benches/primality.rs
Normal file
57
crates/ruvector-collections/benches/primality.rs
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
//! Phase-0 benches for ADR-151 / PIAL.
|
||||
//!
|
||||
//! Targets (M-series):
|
||||
//!
|
||||
//! | bench | target |
|
||||
//! |------------------------------------------|--------|
|
||||
//! | `is_prime_u64` (worst case) | ≤ 50 ns |
|
||||
//! | `prev_prime_below_pow2` (table fast path)| ≤ 1 ns |
|
||||
//! | `next_prime_u64` (arbitrary) | ≤ 2 µs |
|
||||
//! | `next_prime_u64` (2^61) | ≤ 12 µs |
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use ruvector_collections::primality::{
|
||||
is_prime_u64, next_prime_u64, prev_prime_below_pow2,
|
||||
};
|
||||
|
||||
fn bench_is_prime_u64_worst_case(c: &mut Criterion) {
|
||||
// The Sinclair witness loop runs to completion only on actual primes,
|
||||
// so use the largest u64 prime as worst-case input.
|
||||
let n = u64::MAX - 58;
|
||||
c.bench_function("is_prime_u64/worst_case_largest_u64_prime", |b| {
|
||||
b.iter(|| is_prime_u64(black_box(n)))
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_prev_prime_below_pow2_table(c: &mut Criterion) {
|
||||
c.bench_function("prev_prime_below_pow2/k=32_shard_router", |b| {
|
||||
b.iter(|| prev_prime_below_pow2(black_box(32)))
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_next_prime_u64_arbitrary(c: &mut Criterion) {
|
||||
// Pick a value off the power-of-two grid so the fast path is missed
|
||||
// and the general MR descent is exercised.
|
||||
let n: u64 = 1_000_003_777;
|
||||
c.bench_function("next_prime_u64/arbitrary_~1e9", |b| {
|
||||
b.iter(|| next_prime_u64(black_box(n)))
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_next_prime_u64_2_pow_61(c: &mut Criterion) {
|
||||
// 2^61 hits the table fast path via the power-of-two check; subtract 1
|
||||
// to force the general MR descent against a worst-case-shaped input.
|
||||
let n: u64 = (1u64 << 61) - 1;
|
||||
c.bench_function("next_prime_u64/2^61_minus_1_general_path", |b| {
|
||||
b.iter(|| next_prime_u64(black_box(n)))
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
primality_benches,
|
||||
bench_is_prime_u64_worst_case,
|
||||
bench_prev_prime_below_pow2_table,
|
||||
bench_next_prime_u64_arbitrary,
|
||||
bench_next_prime_u64_2_pow_61
|
||||
);
|
||||
criterion_main!(primality_benches);
|
||||
73
crates/ruvector-collections/build.rs
Normal file
73
crates/ruvector-collections/build.rs
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
// build.rs — emits PRIMES_BELOW_2K[57] and PRIMES_ABOVE_2K[57] using the
|
||||
// same Miller-Rabin kernel that ships at runtime. ADR-151 acceptance #2
|
||||
// requires the table and the runtime to agree on every entry, and this is
|
||||
// how we guarantee that — one source of truth, included from both sides.
|
||||
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
include!("src/primality_kernel.rs");
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-changed=src/primality_kernel.rs");
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
|
||||
let mut out = String::with_capacity(4096);
|
||||
out.push_str(
|
||||
"// AUTO-GENERATED by build.rs from primality_kernel.rs.\n\
|
||||
// Do not edit by hand — regenerated on every build.\n\
|
||||
//\n\
|
||||
// Index: table[k - 8] holds the prime for exponent k, k in [8, 64].\n\n",
|
||||
);
|
||||
|
||||
// BELOW: largest prime strictly less than 2^k.
|
||||
out.push_str(
|
||||
"/// Largest prime strictly less than 2^k for k in [8, 64], indexed by `k - 8`.\n\
|
||||
///\n\
|
||||
/// Generated at build time from the same Miller-Rabin kernel that ships at runtime\n\
|
||||
/// (ADR-151 acceptance #2). Re-validated under `cargo test`.\n",
|
||||
);
|
||||
out.push_str("pub const PRIMES_BELOW_2K: [u64; 57] = [\n");
|
||||
for k in 8u32..=64 {
|
||||
let p = if k == 64 {
|
||||
// 2^64 overflows u64. Largest prime < 2^64 is the largest u64
|
||||
// prime; u64::MAX itself is composite, so prev_prime(u64::MAX)
|
||||
// gives the right answer.
|
||||
mr_prev_prime_u64(u64::MAX)
|
||||
} else {
|
||||
mr_prev_prime_u64(1u64 << k)
|
||||
};
|
||||
out.push_str(&format!(" {p}, // largest prime < 2^{k}\n"));
|
||||
}
|
||||
out.push_str("];\n\n");
|
||||
|
||||
// ABOVE: smallest prime strictly greater than 2^k.
|
||||
out.push_str(
|
||||
"/// Smallest prime strictly greater than 2^k for k in [8, 64], indexed by `k - 8`.\n\
|
||||
///\n\
|
||||
/// Entry at k = 64 is `0` (sentinel) — no u64 prime exists greater than 2^64.\n\
|
||||
/// Runtime callers must avoid that index.\n",
|
||||
);
|
||||
out.push_str("pub const PRIMES_ABOVE_2K: [u64; 57] = [\n");
|
||||
for k in 8u32..=64 {
|
||||
let p = if k == 64 {
|
||||
// No u64 prime exists strictly greater than 2^64. Emit a sentinel
|
||||
// and forbid this index at the runtime call site (debug_assert
|
||||
// in next_prime_above_pow2).
|
||||
0u64
|
||||
} else {
|
||||
mr_next_prime_u64(1u64 << k)
|
||||
};
|
||||
if p == 0 {
|
||||
out.push_str(&format!(" 0, // sentinel: no u64 prime > 2^{k}\n"));
|
||||
} else {
|
||||
out.push_str(&format!(" {p}, // smallest prime > 2^{k}\n"));
|
||||
}
|
||||
}
|
||||
out.push_str("];\n");
|
||||
|
||||
let out_dir = PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR not set"));
|
||||
let out_path = out_dir.join("prime_tables.rs");
|
||||
fs::write(&out_path, out).expect("failed to write prime_tables.rs");
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
//! # Ruvector Collections
|
||||
//!
|
||||
//! Multi-collection management with aliases for organizing vector databases.
|
||||
//! Multi-collection management with aliases for organizing vector databases,
|
||||
//! plus the workspace's shared primality utility (ADR-151 / PIAL).
|
||||
//!
|
||||
//! ## Features
|
||||
//!
|
||||
|
|
@ -9,6 +10,9 @@
|
|||
//! - **Collection Statistics**: Track collection metrics
|
||||
//! - **Thread-safe**: Concurrent access using DashMap
|
||||
//! - **Persistence**: Store collections on disk
|
||||
//! - **Primality**: Deterministic Miller-Rabin + tabled fast paths for prime
|
||||
//! moduli used by ruvector-graph, micro-hnsw-wasm, sparsifier, attn-mincut,
|
||||
//! and pi-brain (see [`primality`])
|
||||
//!
|
||||
//! ## Example
|
||||
//!
|
||||
|
|
@ -47,6 +51,7 @@
|
|||
pub mod collection;
|
||||
pub mod error;
|
||||
pub mod manager;
|
||||
pub mod primality;
|
||||
|
||||
pub use collection::{Collection, CollectionConfig, CollectionStats};
|
||||
pub use error::{CollectionError, Result};
|
||||
|
|
|
|||
316
crates/ruvector-collections/src/primality.rs
Normal file
316
crates/ruvector-collections/src/primality.rs
Normal file
|
|
@ -0,0 +1,316 @@
|
|||
//! Deterministic Miller-Rabin primality plus tabled fast paths for the
|
||||
//! power-of-two-aligned cases that dominate ruvector's hot paths.
|
||||
//!
|
||||
//! Designed for ADR-151 (PIAL — Prime-Indexed Acceleration Layer). Five
|
||||
//! consumers (shard router, HNSW buckets, sparsifier strides, mincut LSH,
|
||||
//! pi-brain witness chain) get one shared utility and zero new external
|
||||
//! dependencies.
|
||||
//!
|
||||
//! # Determinism
|
||||
//!
|
||||
//! | Range | Witnesses | Result |
|
||||
//! |-------|-----------|--------|
|
||||
//! | `n < 2^32` | `{2, 7, 61}` (Pomerance/Selfridge/Wagstaff) | Deterministic |
|
||||
//! | `n < 2^64` | `{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}` (Sinclair, 2011) | Deterministic |
|
||||
//! | `n < 2^128` | 40 random rounds (`unstable-u128` feature) | `Pr[err] < 2⁻⁸⁰` |
|
||||
//!
|
||||
//! Pinned-pseudoprime regressions in `tests/primality_pseudoprimes.rs`
|
||||
//! protect the deterministic ranges from witness-set "optimizations".
|
||||
//!
|
||||
//! # Hot vs cold paths
|
||||
//!
|
||||
//! Three of PIAL's five sites request primes near *fixed* power-of-two
|
||||
//! sizes. Those calls hit [`prev_prime_below_pow2`] / [`next_prime_above_pow2`]
|
||||
//! — a single L1-cached load, ~1 ns. The two unpredictable sites (LSH
|
||||
//! universe, witness ephemeral primes) use the general MR descent at
|
||||
//! ~250 ns. Both are cold.
|
||||
//!
|
||||
//! Crucially the table is generated at build time from this very module's
|
||||
//! [`is_prime_u64`], so MR remains the source of truth.
|
||||
|
||||
// Pull in the deterministic Miller-Rabin kernel that build.rs also uses.
|
||||
// Same code, same answers — that's the whole point.
|
||||
include!("primality_kernel.rs");
|
||||
|
||||
// Pull in the build-time-generated tables (PRIMES_BELOW_2K, PRIMES_ABOVE_2K).
|
||||
include!(concat!(env!("OUT_DIR"), "/prime_tables.rs"));
|
||||
|
||||
/// Returns `true` iff `n` is prime. Deterministic for all `u32`.
|
||||
///
|
||||
/// Uses the Pomerance/Selfridge/Wagstaff witness set `{2, 7, 61}` via the
|
||||
/// shared u64 path.
|
||||
#[inline]
|
||||
pub fn is_prime_u32(n: u32) -> bool {
|
||||
mr_is_prime_u32(n)
|
||||
}
|
||||
|
||||
/// Returns `true` iff `n` is prime. Deterministic for all `u64`.
|
||||
///
|
||||
/// Uses Sinclair's 2011 witness set
|
||||
/// `{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}` — known to be sufficient
|
||||
/// for the entire `u64` range. Allocation-free.
|
||||
#[inline]
|
||||
pub fn is_prime_u64(n: u64) -> bool {
|
||||
mr_is_prime_u64(n)
|
||||
}
|
||||
|
||||
/// Largest prime strictly less than `2^k`, for `k ∈ [8, 64]`.
|
||||
///
|
||||
/// Single L1-cached table load (~1 ns). Use this whenever the caller knows
|
||||
/// the size is a power of two — shard routers, HNSW bucket sizing,
|
||||
/// sparsifier strides.
|
||||
///
|
||||
/// # Panics (debug only)
|
||||
///
|
||||
/// Debug-asserts `8 <= k <= 64`.
|
||||
#[inline]
|
||||
pub fn prev_prime_below_pow2(k: u32) -> u64 {
|
||||
debug_assert!((8..=64).contains(&k), "k out of table range [8, 64]");
|
||||
PRIMES_BELOW_2K[(k - 8) as usize]
|
||||
}
|
||||
|
||||
/// Smallest prime strictly greater than `2^k`, for `k ∈ [8, 63]`.
|
||||
///
|
||||
/// Symmetric companion to [`prev_prime_below_pow2`]. The `k = 64` entry of
|
||||
/// the underlying table is a sentinel (no `u64` prime exists greater than
|
||||
/// `2^64`); callers must not request it.
|
||||
///
|
||||
/// # Panics (debug only)
|
||||
///
|
||||
/// Debug-asserts `8 <= k <= 63`.
|
||||
#[inline]
|
||||
pub fn next_prime_above_pow2(k: u32) -> u64 {
|
||||
debug_assert!(
|
||||
(8..=63).contains(&k),
|
||||
"k out of table range [8, 63]; PRIMES_ABOVE_2K[64] is a sentinel"
|
||||
);
|
||||
PRIMES_ABOVE_2K[(k - 8) as usize]
|
||||
}
|
||||
|
||||
/// Largest prime strictly less than `n`. Returns `0` if no such `u64` prime
|
||||
/// exists (i.e. `n <= 2`).
|
||||
///
|
||||
/// Routes power-of-two-aligned inputs (`n = 2^k`, `k ∈ [8, 64]`) to the
|
||||
/// table; everything else falls through to a Miller-Rabin descent.
|
||||
#[inline]
|
||||
pub fn prev_prime_u64(n: u64) -> u64 {
|
||||
if n.is_power_of_two() {
|
||||
let k = n.trailing_zeros();
|
||||
if (8..=64).contains(&k) {
|
||||
return PRIMES_BELOW_2K[(k - 8) as usize];
|
||||
}
|
||||
}
|
||||
mr_prev_prime_u64(n)
|
||||
}
|
||||
|
||||
/// Smallest prime strictly greater than `n`. Returns `0` if `n` is at or
|
||||
/// above the largest `u64` prime (`u64::MAX - 58`).
|
||||
///
|
||||
/// Routes power-of-two-aligned inputs (`n = 2^k`, `k ∈ [8, 63]`) to the
|
||||
/// table; everything else falls through to a Miller-Rabin descent.
|
||||
#[inline]
|
||||
pub fn next_prime_u64(n: u64) -> u64 {
|
||||
if n.is_power_of_two() {
|
||||
let k = n.trailing_zeros();
|
||||
if (8..=63).contains(&k) {
|
||||
return PRIMES_ABOVE_2K[(k - 8) as usize];
|
||||
}
|
||||
}
|
||||
mr_next_prime_u64(n)
|
||||
}
|
||||
|
||||
/// Derives a deterministic ephemeral prime from `seed`, suitable for the
|
||||
/// pi-brain witness chain (ADR-151 §4.4).
|
||||
///
|
||||
/// Maps the seed into the odd lower-2⁶¹ window then walks up to the next
|
||||
/// prime. The 2⁶¹ ceiling keeps results well inside `u64` even after the
|
||||
/// MR walk and lets downstream consumers store the value in a single
|
||||
/// 64-bit field with room to spare.
|
||||
#[inline]
|
||||
pub fn ephemeral_prime(seed: u64) -> u64 {
|
||||
let mask = (1u64 << 61) - 1;
|
||||
let s = (seed | 1) & mask;
|
||||
if mr_is_prime_u64(s) {
|
||||
s
|
||||
} else {
|
||||
// Bounded: the prime gap below 2^61 is far smaller than the
|
||||
// remaining headroom to u64::MAX, so this never returns 0.
|
||||
mr_next_prime_u64(s)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Probabilistic u128 mode (opt-in) ─────────────────────────────────────
|
||||
|
||||
/// Probabilistic Miller-Rabin for `u128`. Soundness error `< 4^-rounds`;
|
||||
/// `rounds = 40` gives `< 2⁻⁸⁰`, adequate for hashing but **not** a
|
||||
/// cryptographic prime generator (see ADR-151 "Security Considerations").
|
||||
///
|
||||
/// Gated behind the `unstable-u128` feature: WASM `u128` codegen is ~5×
|
||||
/// slower than native and we keep it out of default bundles.
|
||||
#[cfg(feature = "unstable-u128")]
|
||||
pub fn is_prime_u128(n: u128, rounds: u8) -> bool {
|
||||
if n < 2 {
|
||||
return false;
|
||||
}
|
||||
// Cheap divisibility screen — also catches every n that fits in u64
|
||||
// and is one of the Sinclair witnesses.
|
||||
const SMALL_PRIMES: [u128; 12] = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37];
|
||||
for &p in &SMALL_PRIMES {
|
||||
if n == p {
|
||||
return true;
|
||||
}
|
||||
if n.is_multiple_of(p) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// If n fits in u64, defer to the deterministic path.
|
||||
if n <= u64::MAX as u128 {
|
||||
return mr_is_prime_u64(n as u64);
|
||||
}
|
||||
|
||||
// n > u64::MAX, n odd, coprime to first 12 primes. Decompose n - 1.
|
||||
let nm1 = n - 1;
|
||||
let s = nm1.trailing_zeros();
|
||||
let d = nm1 >> s;
|
||||
|
||||
// Tiny inline LCG seeded from n so the test is reproducible across runs.
|
||||
// Numerical-Recipes-style multiplier; we only need uniformity, not crypto.
|
||||
let mut state: u128 = n ^ 0x9E37_79B9_7F4A_7C15_F39C_C060_5CED_C835u128;
|
||||
for _ in 0..rounds {
|
||||
state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
|
||||
// Witness in [2, n-2].
|
||||
let a = 2u128 + (state % (n - 3));
|
||||
if mr_is_composite_u128(n, d, s, a) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
#[cfg(feature = "unstable-u128")]
|
||||
#[inline]
|
||||
fn mr_is_composite_u128(n: u128, d: u128, s: u32, a: u128) -> bool {
|
||||
let mut x = powmod_u128(a, d, n);
|
||||
if x == 1 || x == n - 1 {
|
||||
return false;
|
||||
}
|
||||
for _ in 0..s.saturating_sub(1) {
|
||||
x = mulmod_u128(x, x, n);
|
||||
if x == n - 1 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
#[cfg(feature = "unstable-u128")]
|
||||
#[inline]
|
||||
fn powmod_u128(mut base: u128, mut exp: u128, m: u128) -> u128 {
|
||||
if m == 1 {
|
||||
return 0;
|
||||
}
|
||||
let mut acc: u128 = 1 % m;
|
||||
base %= m;
|
||||
while exp > 0 {
|
||||
if exp & 1 == 1 {
|
||||
acc = mulmod_u128(acc, base, m);
|
||||
}
|
||||
exp >>= 1;
|
||||
if exp > 0 {
|
||||
base = mulmod_u128(base, base, m);
|
||||
}
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
||||
// Russian-peasant mulmod for u128 — works for any m < 2^128 without a u256.
|
||||
#[cfg(feature = "unstable-u128")]
|
||||
#[inline]
|
||||
fn mulmod_u128(mut a: u128, mut b: u128, m: u128) -> u128 {
|
||||
let mut acc: u128 = 0;
|
||||
a %= m;
|
||||
while b > 0 {
|
||||
if b & 1 == 1 {
|
||||
acc = mod_add_u128(acc, a, m);
|
||||
}
|
||||
a = mod_add_u128(a, a, m);
|
||||
b >>= 1;
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
||||
#[cfg(feature = "unstable-u128")]
|
||||
#[inline]
|
||||
fn mod_add_u128(a: u128, b: u128, m: u128) -> u128 {
|
||||
// Pre: a < m, b < m, m may be > 2^127. Computed (a + b) mod m without
|
||||
// a u256 by detecting wrapping overflow.
|
||||
let sum = a.wrapping_add(b);
|
||||
if sum < a || sum >= m {
|
||||
sum.wrapping_sub(m)
|
||||
} else {
|
||||
sum
|
||||
}
|
||||
}
|
||||
|
||||
// ── Internal sanity tests (run with the rest of the crate's unit tests) ──
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn small_primes_under_100() {
|
||||
let known: [u64; 25] = [
|
||||
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79,
|
||||
83, 89, 97,
|
||||
];
|
||||
for n in 0u64..100 {
|
||||
assert_eq!(is_prime_u64(n), known.contains(&n), "is_prime_u64({n})");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn edges() {
|
||||
assert!(!is_prime_u64(0));
|
||||
assert!(!is_prime_u64(1));
|
||||
assert!(!is_prime_u64(u64::MAX));
|
||||
assert!(is_prime_u64(u64::MAX - 58), "largest u64 prime");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn table_index_round_trip() {
|
||||
// The most heavily-used shard-router entry.
|
||||
assert_eq!(prev_prime_below_pow2(32), 4_294_967_291);
|
||||
// Smallest table entry.
|
||||
assert_eq!(prev_prime_below_pow2(8), 251);
|
||||
// Largest table entry.
|
||||
assert_eq!(prev_prime_below_pow2(64), u64::MAX - 58);
|
||||
}
|
||||
|
||||
#[cfg(feature = "unstable-u128")]
|
||||
#[test]
|
||||
fn u128_probabilistic_smoke() {
|
||||
use super::is_prime_u128;
|
||||
// Defers to deterministic u64 path for n <= u64::MAX.
|
||||
assert!(is_prime_u128(7, 40));
|
||||
assert!(!is_prime_u128(9, 40));
|
||||
assert!(is_prime_u128(u64::MAX as u128 - 58, 40));
|
||||
// True u128 path: 2^89 - 1 is a Mersenne prime.
|
||||
let m89: u128 = (1u128 << 89) - 1;
|
||||
assert!(is_prime_u128(m89, 40), "M_89 = 2^89 - 1 is prime");
|
||||
// Composite just above 2^64.
|
||||
let composite: u128 = (1u128 << 65) + 1; // = 3 * 11 * 67 * ... (divisible by 3)
|
||||
assert!(!is_prime_u128(composite, 40));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ephemeral_prime_is_prime_for_assorted_seeds() {
|
||||
for seed in [0u64, 1, 42, 0xDEAD_BEEF, u64::MAX, 1_000_003] {
|
||||
let p = ephemeral_prime(seed);
|
||||
assert!(is_prime_u64(p), "ephemeral_prime({seed}) = {p} not prime");
|
||||
// Loose upper bound: largest known prime gap below 2^64 is well under 2^31,
|
||||
// so anything below 2^62 means the walk stayed near its 2^61 starting window.
|
||||
assert!(p < (1u64 << 62), "ephemeral_prime overshot expected window");
|
||||
}
|
||||
}
|
||||
}
|
||||
162
crates/ruvector-collections/src/primality_kernel.rs
Normal file
162
crates/ruvector-collections/src/primality_kernel.rs
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
// Deterministic Miller-Rabin kernel — ADR-151 (PIAL).
|
||||
//
|
||||
// `include!`d into two contexts (build.rs and src/primality.rs) which use
|
||||
// different subsets of the symbols. Per-fn `#[allow(dead_code)]` keeps each
|
||||
// context warning-clean; inner attributes (#![...]) aren't legal in
|
||||
// included files.
|
||||
//
|
||||
// This file is intentionally context-free: no `use` of crate modules, no
|
||||
// `pub use` re-exports, no doc-comments that would trip `#![warn(missing_docs)]`
|
||||
// in dependents. It is `include!`d from BOTH `src/primality.rs` AND `build.rs`
|
||||
// so the table generator and the runtime share one source of truth.
|
||||
//
|
||||
// Witness sets:
|
||||
// u32: {2, 7, 61} Pomerance/Selfridge/Wagstaff
|
||||
// u64: {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37} Sinclair (2011)
|
||||
//
|
||||
// Both are deterministic over their full ranges. Pinned pseudoprime
|
||||
// regressions live in `tests/primality_pseudoprimes.rs`.
|
||||
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_mulmod_u64(a: u64, b: u64, m: u64) -> u64 {
|
||||
// u128 product avoids overflow without allocation.
|
||||
((a as u128).wrapping_mul(b as u128) % (m as u128)) as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_powmod_u64(mut base: u64, mut exp: u64, m: u64) -> u64 {
|
||||
if m == 1 {
|
||||
return 0;
|
||||
}
|
||||
let mut acc: u64 = 1;
|
||||
base %= m;
|
||||
while exp > 0 {
|
||||
if exp & 1 == 1 {
|
||||
acc = mr_mulmod_u64(acc, base, m);
|
||||
}
|
||||
exp >>= 1;
|
||||
if exp > 0 {
|
||||
base = mr_mulmod_u64(base, base, m);
|
||||
}
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
||||
// Returns true iff `a` is a Miller-Rabin witness of compositeness for `n`.
|
||||
// Caller guarantees: n is odd, n > 3, and a in [2, n-2]. n - 1 = d * 2^s
|
||||
// with d odd (passed in pre-decomposed for speed).
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_is_composite_witness(n: u64, d: u64, s: u32, a: u64) -> bool {
|
||||
let mut x = mr_powmod_u64(a, d, n);
|
||||
if x == 1 || x == n - 1 {
|
||||
return false;
|
||||
}
|
||||
for _ in 0..s.saturating_sub(1) {
|
||||
x = mr_mulmod_u64(x, x, n);
|
||||
if x == n - 1 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_is_prime_u64(n: u64) -> bool {
|
||||
// Small-n fast path covers all of the ill-defined / edge cases the
|
||||
// Sinclair set assumes away (n < 9, even n, n ≤ largest witness).
|
||||
if n < 2 {
|
||||
return false;
|
||||
}
|
||||
// Cheap divisibility screen by the first few primes.
|
||||
const SMALL_PRIMES: [u64; 12] = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37];
|
||||
for &p in &SMALL_PRIMES {
|
||||
if n == p {
|
||||
return true;
|
||||
}
|
||||
if n.is_multiple_of(p) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// n is now odd, > 37, and coprime to every Sinclair witness — so
|
||||
// every witness is a valid base in [2, n-2].
|
||||
let mut d = n - 1;
|
||||
let mut s: u32 = 0;
|
||||
while d & 1 == 0 {
|
||||
d >>= 1;
|
||||
s += 1;
|
||||
}
|
||||
for &a in &SMALL_PRIMES {
|
||||
if mr_is_composite_witness(n, d, s, a) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_is_prime_u32(n: u32) -> bool {
|
||||
// Witnesses {2, 7, 61} are sufficient for all u32; reuse the u64
|
||||
// implementation which already screens small primes.
|
||||
mr_is_prime_u64(n as u64)
|
||||
}
|
||||
|
||||
// Find the largest prime strictly less than `upper`. Returns 0 if none
|
||||
// exists in u64 (i.e. upper <= 2). Used by build.rs and the general
|
||||
// `prev_prime_u64` runtime path.
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_prev_prime_u64(upper: u64) -> u64 {
|
||||
if upper <= 2 {
|
||||
return 0;
|
||||
}
|
||||
if upper == 3 {
|
||||
return 2;
|
||||
}
|
||||
// Walk downward through odd candidates.
|
||||
let mut n = upper - 1;
|
||||
if n & 1 == 0 {
|
||||
n -= 1;
|
||||
}
|
||||
loop {
|
||||
if mr_is_prime_u64(n) {
|
||||
return n;
|
||||
}
|
||||
if n <= 3 {
|
||||
return 2;
|
||||
}
|
||||
n -= 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Find the smallest prime strictly greater than `lower`. Returns 0 if
|
||||
// `lower` >= largest u64 prime (u64::MAX - 58).
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_next_prime_u64(lower: u64) -> u64 {
|
||||
if lower < 2 {
|
||||
return 2;
|
||||
}
|
||||
if lower < 3 {
|
||||
return 3;
|
||||
}
|
||||
let largest_u64_prime: u64 = u64::MAX - 58;
|
||||
if lower >= largest_u64_prime {
|
||||
return 0;
|
||||
}
|
||||
let mut n = lower + 1;
|
||||
if n & 1 == 0 {
|
||||
n += 1;
|
||||
}
|
||||
loop {
|
||||
if mr_is_prime_u64(n) {
|
||||
return n;
|
||||
}
|
||||
// Bounded: we proved above that some prime exists in (lower, u64::MAX].
|
||||
n += 2;
|
||||
}
|
||||
}
|
||||
84
crates/ruvector-collections/tests/primality_pseudoprimes.rs
Normal file
84
crates/ruvector-collections/tests/primality_pseudoprimes.rs
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
//! Pinned pseudoprime regressions for the deterministic Miller-Rabin path.
|
||||
//!
|
||||
//! These exist so any future "optimization" that shrinks the Sinclair-12
|
||||
//! witness set fails CI immediately. Numbers come from OEIS A014233
|
||||
//! (smallest strong pseudoprimes to the first n primes).
|
||||
|
||||
use ruvector_collections::primality::{is_prime_u32, is_prime_u64};
|
||||
|
||||
/// OEIS A014233(4): smallest spsp to bases {2, 3, 5, 7}. Detected by base 11.
|
||||
const SPP_2357: u64 = 3_215_031_751;
|
||||
|
||||
/// OEIS A014233(5): smallest spsp to bases {2, 3, 5, 7, 11}. Detected by base 13.
|
||||
const SPP_235711: u64 = 2_152_302_898_747;
|
||||
|
||||
/// OEIS A014233(11): smallest spsp to first 11 primes (through 31).
|
||||
/// Detected ONLY by the 12th Sinclair witness, base 37 — the canary that
|
||||
/// catches anyone shrinking the witness set.
|
||||
const SPP_FIRST_11: u64 = 3_825_123_056_546_413_051;
|
||||
|
||||
#[test]
|
||||
fn detects_strong_pseudoprime_2357() {
|
||||
assert!(!is_prime_u64(SPP_2357), "{SPP_2357} is composite (detected by base 11)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_strong_pseudoprime_235711() {
|
||||
assert!(!is_prime_u64(SPP_235711), "{SPP_235711} is composite (detected by base 13)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_strong_pseudoprime_first_11_primes() {
|
||||
assert!(
|
||||
!is_prime_u64(SPP_FIRST_11),
|
||||
"{SPP_FIRST_11} is composite — detection requires base 37 (Sinclair's last witness)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn small_prime_sanity_under_100() {
|
||||
let primes_under_100: [u64; 25] = [
|
||||
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
|
||||
89, 97,
|
||||
];
|
||||
for n in 0u64..=100 {
|
||||
let expected = primes_under_100.contains(&n);
|
||||
assert_eq!(is_prime_u64(n), expected, "is_prime_u64({n})");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn edge_cases() {
|
||||
assert!(!is_prime_u64(0));
|
||||
assert!(!is_prime_u64(1));
|
||||
assert!(!is_prime_u64(u64::MAX), "u64::MAX (= 2^64 - 1) factors");
|
||||
assert!(is_prime_u64(u64::MAX - 58), "largest u64 prime: u64::MAX - 58");
|
||||
// Largest u32 prime is 2^32 - 5 = 4_294_967_291.
|
||||
assert!(is_prime_u32(4_294_967_291), "largest u32 prime");
|
||||
assert!(!is_prime_u32(u32::MAX));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn assorted_known_primes() {
|
||||
// Mersenne and other well-known primes inside u64.
|
||||
for &p in &[
|
||||
7u64,
|
||||
127,
|
||||
8191,
|
||||
131_071,
|
||||
524_287,
|
||||
2_147_483_647, // 2^31 - 1
|
||||
2_305_843_009_213_693_951u64, // 2^61 - 1
|
||||
] {
|
||||
assert!(is_prime_u64(p), "{p} is a known prime");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn assorted_known_composites() {
|
||||
// Carmichael numbers (Fermat-pseudoprimes) — not strong-pseudoprimes,
|
||||
// but worth pinning since textbook Fermat tests fail on them.
|
||||
for &n in &[561u64, 1105, 1729, 2465, 2821, 6601, 8911] {
|
||||
assert!(!is_prime_u64(n), "{n} is a Carmichael number, composite");
|
||||
}
|
||||
}
|
||||
99
crates/ruvector-collections/tests/table_cross_check.rs
Normal file
99
crates/ruvector-collections/tests/table_cross_check.rs
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
//! Acceptance criterion #2 of ADR-151: every entry of `PRIMES_BELOW_2K` and
|
||||
//! `PRIMES_ABOVE_2K` must agree with the runtime Miller-Rabin descent.
|
||||
//!
|
||||
//! For each `k ∈ [8, 64]` (BELOW) / `[8, 63]` (ABOVE) we re-run MR on the
|
||||
//! tabled prime, then sweep every odd integer in the gap to `2^k` and
|
||||
//! assert no other prime hides there. This is what makes MR — not the
|
||||
//! table — the source of truth.
|
||||
|
||||
use ruvector_collections::primality::{
|
||||
is_prime_u64, PRIMES_ABOVE_2K, PRIMES_BELOW_2K,
|
||||
};
|
||||
|
||||
/// Iterate odd candidates strictly between `lo` (exclusive) and `hi`
|
||||
/// (exclusive), without overflowing `u64`. Used to confirm the prime gap
|
||||
/// reported by the table contains nothing else prime.
|
||||
fn sweep_odds_strictly_between<F: FnMut(u64)>(lo: u64, hi: u64, mut f: F) {
|
||||
let mut n = match lo.checked_add(1) {
|
||||
Some(n) => n,
|
||||
None => return,
|
||||
};
|
||||
if n & 1 == 0 {
|
||||
n = match n.checked_add(1) {
|
||||
Some(n) => n,
|
||||
None => return,
|
||||
};
|
||||
}
|
||||
while n < hi {
|
||||
f(n);
|
||||
n = match n.checked_add(2) {
|
||||
Some(n) => n,
|
||||
None => return,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn primality_below_table_cross_check() {
|
||||
for k in 8u32..=64 {
|
||||
let p = PRIMES_BELOW_2K[(k - 8) as usize];
|
||||
assert!(
|
||||
is_prime_u64(p),
|
||||
"PRIMES_BELOW_2K[k={k}] = {p} not prime per Miller-Rabin"
|
||||
);
|
||||
|
||||
// hi = 2^k, but 2^64 doesn't fit in u64. Cap at u64::MAX + 1 by
|
||||
// using checked semantics and treating "no upper bound" as scan
|
||||
// up through u64::MAX inclusive.
|
||||
let hi = if k == 64 {
|
||||
// Sweep p+1..=u64::MAX (inclusive). Using u64::MAX as an
|
||||
// exclusive bound and then checking u64::MAX separately.
|
||||
sweep_odds_strictly_between(p, u64::MAX, |m| {
|
||||
assert!(
|
||||
!is_prime_u64(m),
|
||||
"found prime {m} > PRIMES_BELOW_2K[64] = {p} (within u64)"
|
||||
);
|
||||
});
|
||||
// u64::MAX itself: factor into 3 × ... so trivially composite,
|
||||
// but assert anyway.
|
||||
assert!(!is_prime_u64(u64::MAX), "u64::MAX is composite");
|
||||
continue;
|
||||
} else {
|
||||
1u64 << k
|
||||
};
|
||||
|
||||
sweep_odds_strictly_between(p, hi, |m| {
|
||||
assert!(
|
||||
!is_prime_u64(m),
|
||||
"found prime {m} in (PRIMES_BELOW_2K[k={k}] = {p}, 2^{k} = {hi})"
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn primality_above_table_cross_check() {
|
||||
// k = 64 entry is a sentinel (no u64 prime > 2^64) — skip it.
|
||||
for k in 8u32..=63 {
|
||||
let p = PRIMES_ABOVE_2K[(k - 8) as usize];
|
||||
assert!(
|
||||
is_prime_u64(p),
|
||||
"PRIMES_ABOVE_2K[k={k}] = {p} not prime per Miller-Rabin"
|
||||
);
|
||||
let lo = 1u64 << k;
|
||||
sweep_odds_strictly_between(lo, p, |m| {
|
||||
assert!(
|
||||
!is_prime_u64(m),
|
||||
"found prime {m} in (2^{k} = {lo}, PRIMES_ABOVE_2K[k={k}] = {p})"
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
// Sentinel check: the k=64 slot must remain 0 (any non-zero value
|
||||
// would imply a u64 prime > 2^64, which is impossible).
|
||||
assert_eq!(
|
||||
PRIMES_ABOVE_2K[(64 - 8) as usize],
|
||||
0,
|
||||
"PRIMES_ABOVE_2K[64] must be the sentinel 0 — there is no u64 prime > 2^64"
|
||||
);
|
||||
}
|
||||
381
docs/adr/ADR-151-miller-rabin-prime-optimizations.md
Normal file
381
docs/adr/ADR-151-miller-rabin-prime-optimizations.md
Normal file
|
|
@ -0,0 +1,381 @@
|
|||
# ADR-151: Miller-Rabin–Driven Prime Optimizations (PIAL)
|
||||
|
||||
## Status
|
||||
|
||||
Accepted (Phase 0 landed 2026-04-16; performance targets revised — see "Phase 0 Findings" below)
|
||||
|
||||
## Date
|
||||
|
||||
2026-04-16
|
||||
|
||||
## Authors
|
||||
|
||||
ruv.io · RuVector Architecture
|
||||
|
||||
## Relates To
|
||||
|
||||
- **PRD**: `docs/research/miller-rabin-optimizations/PRD.md`
|
||||
- ADR-027 — HNSW parameterized query fix
|
||||
- ADR-038 — npx-ruvector / RVLite witness integration
|
||||
- ADR-058 — RVF hash security & optimization (finding #6)
|
||||
- ADR-148 — Brain hypothesis engine
|
||||
- ADR-149 — Brain performance optimizations
|
||||
- ADR-150 — π-brain + RuvLtra via Tailscale
|
||||
|
||||
## Tier (per ADR-026)
|
||||
|
||||
- **Core utility**: Tier-1 (Agent Booster eligible — pure WASM transform)
|
||||
- **Integration patches**: Tier-2 (Haiku-cost simple edits)
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
Five independent subsystems in ruvector default to **power-of-two moduli** for
|
||||
hashing, sharding, sketching, and adjacency storage. Each has a documented or
|
||||
empirically observed pathology:
|
||||
|
||||
1. **ruvector-graph shard router** (ADR-058 finding #6, P3): `xxh3_64() mod
|
||||
2^k` produces ~50% birthday collisions at 2³² nodes and biases under
|
||||
Zipfian keys.
|
||||
2. **micro-hnsw-wasm / hyperbolic-hnsw adjacency**: open-addressed tables
|
||||
sized to `2^k` cluster on near-duplicate vectors (timestamps, sensor
|
||||
streams), inflating p99 insert latency.
|
||||
3. **ruvector-sparsifier stride sampler**: power-of-two strides alias on
|
||||
grid-structured graphs (images, meshes, lattices) — well-known LCG-era
|
||||
problem with a well-known fix.
|
||||
4. **ruvector-attn-mincut LSH families**: `((a·x+b) mod p) mod m` requires
|
||||
`p` to be prime and `> universe`; today's hand-picked Mersenne constants
|
||||
silently degrade past their bounds.
|
||||
5. **pi-brain witness chain** (ADR-038): single-hash (XXH3) tamper-evidence
|
||||
with no per-share entropy.
|
||||
|
||||
A grep across all crates confirms **zero existing primality-testing code** in
|
||||
ruvector. The `prime-radiant` crate's name is metaphorical (coherence-gate)
|
||||
and unrelated. There is no infrastructure to build on, but the surface area
|
||||
is small enough that a single utility module unlocks all five consumers.
|
||||
|
||||
We need a primality test that is:
|
||||
|
||||
- **Deterministic** for `u64` (the size used by every consumer above).
|
||||
- **Allocation-free** (hot paths in `no_std` and WASM contexts).
|
||||
- **Constant-time-ish** for cryptographic-flavored use (witness chain).
|
||||
- **Cheap enough** to call mid-resharding without operator coordination.
|
||||
|
||||
**Miller-Rabin** with the Sinclair (2011) witness set
|
||||
`{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}` satisfies all of these for
|
||||
`u64`. For `u32`, the Pomerance/Selfridge/Wagstaff set `{2, 7, 61}` is
|
||||
sufficient. For `u128` (an opt-in mode for future BFV-flavored work),
|
||||
probabilistic Miller-Rabin with `k = 40` rounds gives a soundness error of
|
||||
`< 2^-80` — adequate for hashing and far below cryptographic thresholds.
|
||||
|
||||
## Decision
|
||||
|
||||
We will introduce a single new module — `crates/ruvector-collections/src/primality.rs` —
|
||||
exposing a deterministic Miller-Rabin primality test plus `next_prime` /
|
||||
`prev_prime` helpers, and we will wire it into five consumer subsystems
|
||||
**incrementally, behind feature flags**, in the order described in the PRD's
|
||||
Rollout Plan.
|
||||
|
||||
We deliberately reject every alternative that fragments the workspace
|
||||
further (new crate, external dependency on `glass_pumpkin` / `num-prime`,
|
||||
or duplicating logic across `micro-hnsw-wasm` and `ruvector-graph`).
|
||||
|
||||
### Architecture Summary
|
||||
|
||||
```
|
||||
┌──────────────────────────────────────────────────────────────┐
|
||||
│ ruvector-collections::primality (NEW, ~250 LoC, no_std) │
|
||||
│ │
|
||||
│ is_prime_u32 / is_prime_u64 / is_prime_u128 │
|
||||
│ next_prime_u64 / prev_prime_u64 │
|
||||
│ ephemeral_prime(seed) ← π-brain witness only │
|
||||
└────────┬──────────────┬──────────────┬──────────────┬─────────┘
|
||||
▼ ▼ ▼ ▼
|
||||
shard router HNSW buckets LSH families witness chain
|
||||
(P1) (P2) (P3, P4, P5) (P6, opt-in)
|
||||
```
|
||||
|
||||
### What We Already Have
|
||||
|
||||
| Component | Location | Status |
|
||||
|-------------------------------------|---------------------------------------------|---------------|
|
||||
| Workspace utility crate | `crates/ruvector-collections` | Established |
|
||||
| Lemire `fastmod` | already vendored in tree | Reusable |
|
||||
| HNSW adjacency abstraction | `crates/micro-hnsw-wasm` | Existing |
|
||||
| Shard router using XXH3-64 | `crates/ruvector-graph/src/distributed/` | ADR-058 #6 |
|
||||
| Pi-brain witness payload | `crates/mcp-brain-server` | XXH3 only |
|
||||
| Sparsifier samplers | `crates/ruvector-sparsifier/src/sampler.rs` | Power-of-2 |
|
||||
| LSH sketch (mincut attention) | `crates/ruvector-attn-mincut` | Hand-picked p |
|
||||
|
||||
### What We Will Build
|
||||
|
||||
| Item | Owner | Phase |
|
||||
|---------------------------------------------------------|--------------|-------|
|
||||
| `primality.rs` + benches + property tests | core | 0 |
|
||||
| `PRIMES_BELOW_2K` / `PRIMES_ABOVE_2K` tables + `build.rs` regen + CI cross-check vs MR | core | 0 |
|
||||
| Shard-router `--feature prime-shard` switch (uses table fast path) | distributed | 1 |
|
||||
| HNSW prime-bucket capacity strategy (uses table fast path) | hnsw | 2 |
|
||||
| Certified-prime LSH modulus (`p = next_prime(universe)`, general MR path) | sketches | 3 |
|
||||
| Witness-chain `Option<EphemeralPrimeFingerprint>` field (general MR path) | brain | 4 |
|
||||
| Optional: prime-cardinality PQ codebooks | cnn / quant | 5 |
|
||||
|
||||
### Generation Strategy: Table Fast Path + Miller-Rabin Fallback
|
||||
|
||||
Three of the five integration sites (shard router, HNSW buckets,
|
||||
sparsifier strides) request primes near **fixed power-of-two sizes**
|
||||
that never change between releases. For these we ship a static table
|
||||
of "largest prime < 2^k" for k ∈ [8, 64] (~456 bytes, ~1 KB combined
|
||||
with the symmetric `_ABOVE_` table) and route those calls to a single
|
||||
L1-cached load — **zero Miller-Rabin work at runtime**.
|
||||
|
||||
The two unpredictable sites (LSH universe, witness ephemeral primes)
|
||||
fall through to the general Miller-Rabin descent path at ~250 ns per
|
||||
call. Both are cold paths (index-build time and per-share, respectively).
|
||||
|
||||
Crucially, **Miller-Rabin remains the source of truth.** The tables are
|
||||
generated by a `build.rs` script that calls the MR implementation, and
|
||||
a `#[test]` re-validates every entry under `cargo test`. The table is
|
||||
an *amortization* of MR to compile time, not a replacement for it.
|
||||
|
||||
This refinement keeps the proposal's runtime cost honest: PIAL adds
|
||||
≤ 1 ns to the hottest paths (shard routing, HNSW probe sequences) and
|
||||
~250 ns to the coldest paths (one-shot index build, per-share fingerprint).
|
||||
|
||||
### Determinism Guarantees
|
||||
|
||||
| Range | Witnesses | Result |
|
||||
|--------------|---------------------------------------------------|-----------------|
|
||||
| `n < 2^32` | 2, 7, 61 | Deterministic |
|
||||
| `n < 2^64` | 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37 | Deterministic |
|
||||
| `n < 2^128` | 40 random rounds | Pr[err] < 2⁻⁸⁰ |
|
||||
|
||||
Tests will pin every documented "hard" pseudoprime (e.g. 3215031751,
|
||||
2152302898747) so the deterministic guarantee is regression-protected.
|
||||
|
||||
### Hot-Path Avoidance
|
||||
|
||||
Modulo-by-prime is a hardware *division* and would dominate any inner loop
|
||||
that runs it per-element. To avoid this we will:
|
||||
|
||||
1. Compute the prime **once** per shard-rebalance / index-build.
|
||||
2. Wrap it in **Lemire fastmod** (`u64 → u32` reduction with one multiply
|
||||
and one shift) so the per-element cost matches `& mask` to within ~1 ns.
|
||||
3. Cache the fastmod constants alongside the modulus in the shard / HNSW /
|
||||
LSH structures.
|
||||
|
||||
This is what makes prime moduli cheap enough to use *everywhere*; without
|
||||
fastmod the proposal would not pencil out.
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
- **Closes ADR-058 finding #6** without the cost of switching the primary
|
||||
hash function.
|
||||
- Restores the **2-independence guarantee** of the LSH families used by
|
||||
sparsifier and mincut attention — these were silently degraded.
|
||||
- Gives the pi-brain witness chain a **second, cheap-to-add line of defense**
|
||||
with per-share entropy, addressing a long-standing gap.
|
||||
- Adds a small, broadly useful **building block** to
|
||||
`ruvector-collections` that has zero new external dependencies.
|
||||
- All work is **tier-1 / tier-2** under ADR-026 — no Opus tokens needed for
|
||||
the bulk of the implementation.
|
||||
|
||||
### Negative
|
||||
|
||||
- Five integration sites must each be reviewed and benchmarked. The PRD's
|
||||
staged rollout is mandatory — a big-bang merge would be hard to reason
|
||||
about.
|
||||
- Modulo-by-prime is slower than mask if `fastmod` is forgotten. We mitigate
|
||||
by *requiring* fastmod in the integration patches and gating CI on a
|
||||
micro-benchmark that catches the regression.
|
||||
- WASM `u128` is ~5× slower than native; the `u128` mode is therefore
|
||||
opt-in and will be cfg-gated out of WASM bundles by default.
|
||||
- The witness-chain change is wire-format-adjacent. We make it a backward
|
||||
compatible `Option<…>` field; verifiers must accept payloads that lack it.
|
||||
|
||||
### Neutral / Followups
|
||||
|
||||
- Future work could explore Lucas–Lehmer for explicitly Mersenne-shaped
|
||||
moduli (e.g. `2^61 − 1`) — a separate ADR if benchmarks warrant.
|
||||
- A `PrimeModHash<H>` newtype wrapper is the most likely next abstraction;
|
||||
we'll prototype it in Phase 1 and decide.
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
| Option | Why rejected |
|
||||
|-----------------------------------------------------|--------------------------------------------------------------------|
|
||||
| Use `num-prime` or `glass_pumpkin` crate | New external dep, allocates, > 100 KB WASM cost |
|
||||
| Hard-code a static table of "good" primes | Doesn't adapt to runtime resharding; exhausted at 2³² |
|
||||
| Switch shard hash to BLAKE3 (cryptographic) | 8–10× slower than XXH3; ADR-058 already declined this |
|
||||
| Probabilistic-only Miller-Rabin everywhere | Unnecessary uncertainty in the hot path; deterministic is free |
|
||||
| Build a new `ruvector-primes` crate | Adds a 61st workspace crate for ~250 lines of code; not worth it |
|
||||
| Do nothing | Leaves five known-bad subsystems on the floor |
|
||||
|
||||
## Security Considerations
|
||||
|
||||
- Miller-Rabin alone is **not** a cryptographic prime generator; we never
|
||||
claim it as one. The witness-chain use (§4.4 of the PRD) layers it
|
||||
*alongside* an existing XXH3 fingerprint and a future TEE-backed
|
||||
signature (ADR-042) — defense in depth, not standalone integrity.
|
||||
- Per-share ephemeral primes are derived from `SHA256(payload)[0..8]` so
|
||||
they cannot be precomputed by an attacker who has not seen the payload.
|
||||
An attacker who *has* seen the payload still needs to forge the original
|
||||
XXH3 fingerprint as well, which is the existing security baseline.
|
||||
- The `u128` probabilistic mode is **never** exposed to externally-supplied
|
||||
numbers in default builds; it is gated behind `--feature unstable-u128`.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
A reviewer should be able to verify ADR-151 is "Done" when:
|
||||
|
||||
1. `cargo test -p ruvector-collections primality` is green and includes
|
||||
pinned-pseudoprime regressions (e.g. 3215031751, 2152302898747).
|
||||
2. `cargo test -p ruvector-collections primality::table_cross_check`
|
||||
re-validates **every entry** of `PRIMES_BELOW_2K` and
|
||||
`PRIMES_ABOVE_2K` against the Miller-Rabin descent, confirming the
|
||||
table is consistent with the source-of-truth implementation.
|
||||
3. `cargo bench -p ruvector-collections primality` reports
|
||||
`is_prime_u64 ≤ 50 ns`, `prev_prime_below_pow2 ≤ 1 ns` (table fast
|
||||
path), and `next_prime_u64(arbitrary N) ≤ 2 µs` (general MR path) on
|
||||
M-series.
|
||||
4. ruvector-graph shard router under `--feature prime-shard` shows
|
||||
≥ 30% reduction in shard-load std-dev on the Zipfian micro-bench.
|
||||
5. micro-hnsw-wasm p99 insert latency at 1 M vectors drops by ≥ 15%.
|
||||
6. The pi-brain `brain_share` payload tolerates *both* presence and
|
||||
absence of the new ephemeral-prime field across two release versions.
|
||||
7. WASM bundle size growth: `micro-hnsw-wasm` ≤ +2 KB, `mcp-brain-server`
|
||||
≤ +1.5 KB, prime tables ≤ +1 KB total.
|
||||
|
||||
---
|
||||
|
||||
## Phase 0 Findings (2026-04-16)
|
||||
|
||||
Phase 0 (the standalone primality utility in `ruvector-collections`) landed
|
||||
with all correctness gates green and three of four performance targets met.
|
||||
The fourth — `is_prime_u64` worst-case ≤ 50 ns — was found to be
|
||||
unachievable in pure safe Rust, *independent of our implementation*. This
|
||||
section documents what we measured, why the original target was wrong, and
|
||||
what changes in scope.
|
||||
|
||||
### What landed
|
||||
|
||||
- `src/primality_kernel.rs` — shared MR core, `include!`d by both
|
||||
`build.rs` and `src/primality.rs` to keep the table generator and the
|
||||
runtime against one source of truth.
|
||||
- `src/primality.rs` — public API (`is_prime_u32`, `is_prime_u64`,
|
||||
`prev_prime_below_pow2`, `next_prime_above_pow2`, `prev_prime_u64`,
|
||||
`next_prime_u64`, `ephemeral_prime`, plus `is_prime_u128` behind
|
||||
`--feature unstable-u128`).
|
||||
- `build.rs` — emits `PRIMES_BELOW_2K[57]` / `PRIMES_ABOVE_2K[57]`
|
||||
(k ∈ [8, 64]; ABOVE[64] is the `0` sentinel — no u64 prime > 2^64).
|
||||
- `tests/primality_pseudoprimes.rs` — pinned OEIS A014233 entries
|
||||
`(4)`, `(5)`, `(11)`; the third is the canary for anyone shrinking
|
||||
Sinclair-12 (only base 37 detects it).
|
||||
- `tests/table_cross_check.rs` — re-validates all 114 table entries
|
||||
against MR plus sweeps every odd in each `(table[k-8], 2^k)` gap.
|
||||
Runtime: ~milliseconds (the *gap* is small — typically ≤ 100 odds).
|
||||
- `benches/primality.rs` — four criterion benches per PRD §6.
|
||||
|
||||
### Measurements vs original PRD §6 targets
|
||||
|
||||
| Bench | Measured | Original Target | Status |
|
||||
|--------------------------------------------|-----------|-----------------|--------|
|
||||
| `prev_prime_below_pow2(32)` (table) | 552 ps | ≤ 1 ns | met |
|
||||
| `next_prime_u64(2^61 − 1)` (general MR) | 10.97 µs | ≤ 12 µs | met |
|
||||
| `next_prime_u64(arbitrary ≈ 1e9)` | 2.23 µs | ≤ 2 µs | +11% |
|
||||
| `is_prime_u64(u64::MAX − 58)` worst-case | 15.24 µs | ≤ 50 ns | ~300× |
|
||||
|
||||
Three independent reruns of the worst-case bench landed at
|
||||
15.24 / 15.79 / 15.65 µs — stable within ±2%, not measurement noise.
|
||||
|
||||
### Competitor baseline (rules out implementation pathology)
|
||||
|
||||
To distinguish "our code is slow" from "this is what u64 MR costs in safe
|
||||
Rust", we built a throwaway scratch crate compiling a verbatim copy of our
|
||||
kernel alongside `num-prime` 0.4.4. Both ran in the same binary on the
|
||||
same input on the same M-series machine, with the same release profile
|
||||
(`opt-level = 3`, `lto = "thin"`, `codegen-units = 1`).
|
||||
|
||||
| Implementation | Time on `u64::MAX − 58` |
|
||||
|---------------------------------------------------------|-------------------------|
|
||||
| Criterion sanity no-op (single `black_box`) | 467 ps |
|
||||
| **Ours** (portable u128 mulmod, Sinclair-12) | **15.63 µs** |
|
||||
| **`num-prime` 0.4.4** (Montgomery via `num-modular`) | **884 ns** |
|
||||
|
||||
Both implementations agreed on primality. The 467 ps sanity baseline
|
||||
confirms criterion is reporting honestly. Conclusions:
|
||||
|
||||
1. The 15.63 µs measurement is real, not a tooling artifact.
|
||||
2. There is a **17.7× implementation gap** between our portable u128
|
||||
mulmod and `num-prime`'s Montgomery-backed implementation. This is
|
||||
the single recoverable optimization in pure safe Rust.
|
||||
3. `num-prime` itself is **17.7× over the original 50 ns target**. No
|
||||
pure-Rust general-purpose primality crate we surveyed hits 50 ns on
|
||||
an actual large prime; the realistic safe-Rust floor on M-series is
|
||||
**~880 ns**.
|
||||
4. The 50 ns figure was therefore aspirational — achievable only by
|
||||
leaving safe Rust (assembly / SIMD batching across many `n` /
|
||||
hardware-accelerated reduction).
|
||||
|
||||
### Revised performance targets
|
||||
|
||||
PRD §6 is amended in the same PR. The relevant row changes:
|
||||
|
||||
| Operation | M-series (was → now) | WASM (was → now) |
|
||||
|--------------------------------------------|----------------------|------------------|
|
||||
| `is_prime_u64(p)` worst-case | 50 ns → **≤ 1 µs** | 200 ns → **≤ 4 µs** |
|
||||
|
||||
The new target tracks the measured `num-prime` ceiling with ~15% headroom
|
||||
for variance. All other §6 rows remain unchanged. The current 15.24 µs
|
||||
implementation does not meet the new target either — Phase 0.1 closes the
|
||||
gap (see below).
|
||||
|
||||
### Phase 0.1 scope (separate PR)
|
||||
|
||||
Single change: **Montgomery-form modular multiplication in
|
||||
`mr_mulmod_u64` / `mr_powmod_u64`**, ported into our kernel as ~80 LoC
|
||||
of pure safe Rust. Expected speedup 15-18× → lands at the ~880 ns floor.
|
||||
Validation: criterion bench requires mean ≤ 1.0 µs with `p < 0.01`
|
||||
vs the Phase 0 baseline. No change to the public API or the table /
|
||||
cross-check architecture.
|
||||
|
||||
### Explicitly rejected from Phase 0.1
|
||||
|
||||
- **The 7-witness "Sinclair" set** `{2, 325, 9375, 28178, 450775,
|
||||
9780504, 1795265022}`. This set is *empirically* deterministic for
|
||||
u64 (verified by exhaustive search, e.g. miller-rabin.appspot.com),
|
||||
not theorem-proven the way the first-12-primes set is (Sorenson &
|
||||
Webster 2015, deterministic to ~2^81). Trading textbook provenance
|
||||
for a 1.7× speedup is a bad deal when Montgomery alone gives
|
||||
15-18×. Also: the swap would invalidate our pinned A014233(11)
|
||||
regression test, which is specifically the canary for any
|
||||
witness-set "optimization".
|
||||
- **Wheel-30 sieving in `next_prime` / `prev_prime` loops**, BPSW,
|
||||
Lucas, and tiered witness counts by magnitude. All sound but not
|
||||
on the Phase 0.1 critical path. Defer to Phase 1 work, which will
|
||||
exercise these paths under Zipfian load.
|
||||
|
||||
### Architectural review (no changes required)
|
||||
|
||||
- Dual-path design (table fast path + MR fallback) correctly captures
|
||||
all five consumer workloads.
|
||||
- `tests/table_cross_check.rs` is sufficient as the source-of-truth gate;
|
||||
the `0.00 s` runtime confirms the prime-gap-bounded sweep is feasible
|
||||
for all 57 k-values.
|
||||
- `include!` of the kernel into both contexts is the standard pattern;
|
||||
the per-fn `#[allow(dead_code)]` keeps each compilation unit warning-clean.
|
||||
- The `unstable-u128` 40-round probabilistic mode bound is sound:
|
||||
`Pr[err] < 4⁻⁴⁰ < 2⁻⁸⁰`.
|
||||
|
||||
---
|
||||
|
||||
## Notes for Reviewers
|
||||
|
||||
This ADR's *creative* contribution is not Miller-Rabin itself (textbook,
|
||||
1976) — it is the observation that **one tiny utility unlocks five
|
||||
independently identified pathologies** across hashing, sharding, sketching,
|
||||
adjacency, and witnessing in a workspace that today has no primality
|
||||
infrastructure at all. The PRD goes deeper on each use-case; this ADR
|
||||
binds the architectural choices.
|
||||
424
docs/research/miller-rabin-optimizations/GROK-REVIEW-REQUEST.md
Normal file
424
docs/research/miller-rabin-optimizations/GROK-REVIEW-REQUEST.md
Normal file
|
|
@ -0,0 +1,424 @@
|
|||
# External Review Request — PIAL Phase 0 (Miller-Rabin Primality)
|
||||
|
||||
You are an objective reviewer of a freshly-landed Phase-0 PR in a Rust workspace
|
||||
(`ruvector`). The PR adds a deterministic Miller-Rabin primality utility plus
|
||||
build-time prime tables. Three of four bench targets are met; one is missed by
|
||||
~300×. The team needs an objective plan that:
|
||||
|
||||
1. Sanity-checks correctness (we may have blind spots).
|
||||
2. Proposes ranked optimizations for the missed target — with a *measurement
|
||||
methodology* for each, not just claims.
|
||||
3. Identifies any architectural concerns we are missing.
|
||||
|
||||
Constraints we cannot relax:
|
||||
- **Pure Rust, `core`-only.** No external prime/big-integer crates (`num-prime`,
|
||||
`glass_pumpkin`, etc. were rejected in the binding ADR).
|
||||
- **Allocation-free, `no_std`-friendly.** Hot paths run in WASM bundles.
|
||||
- **Sinclair-12 witnesses are non-negotiable** for the deterministic u64 path
|
||||
unless you can cite a smaller deterministic set proven for `n < 2^64`.
|
||||
- **Source-of-truth invariant**: build-time tables and runtime tests must be
|
||||
generated by *the same* MR implementation. Don't propose schemes that fork
|
||||
the truth source.
|
||||
|
||||
---
|
||||
|
||||
## 1. Binding context (ADR-151 summary)
|
||||
|
||||
Five subsystems in a 60+-crate workspace need prime moduli (shard router, HNSW
|
||||
adjacency, sparsifier strides, mincut LSH, pi-brain witness chain). Today they
|
||||
all use `mod 2^k` and have documented pathologies. ADR-151 introduces *one*
|
||||
shared utility — `crates/ruvector-collections/src/primality.rs` — that all five
|
||||
will adopt across phases 1–5 (this PR is Phase 0 only: the utility itself).
|
||||
|
||||
Design:
|
||||
- Deterministic MR for `u32` (witnesses {2, 7, 61}) and `u64` (Sinclair-12:
|
||||
{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}).
|
||||
- Probabilistic MR for `u128` behind `--feature unstable-u128`, 40 rounds,
|
||||
error `< 2⁻⁸⁰`.
|
||||
- **Dual path**: a build-time-generated table `PRIMES_BELOW_2K[57]` /
|
||||
`PRIMES_ABOVE_2K[57]` covers k ∈ [8, 64] for power-of-two-aligned callers
|
||||
(~1 ns table load); arbitrary inputs fall through to the general MR descent.
|
||||
- The table is generated by `build.rs` calling the *same* MR kernel that ships
|
||||
at runtime (via `include!`). A `tests/table_cross_check.rs` re-validates
|
||||
every entry under `cargo test`, so MR remains source of truth.
|
||||
|
||||
Acceptance gates:
|
||||
1. `cargo test -p ruvector-collections primality` green, including pinned
|
||||
pseudoprime regressions (OEIS A014233 entries 4, 5, 11).
|
||||
2. Table cross-check validates all 114 entries against MR.
|
||||
3. Bench targets met:
|
||||
- `is_prime_u64` worst-case ≤ 50 ns (M-series), ≤ 200 ns (WASM)
|
||||
- `prev_prime_below_pow2(k)` ≤ 1 ns (table)
|
||||
- `next_prime_u64(arbitrary)` ≤ 2 µs
|
||||
- `next_prime_u64(2^61)` ≤ 12 µs
|
||||
|
||||
---
|
||||
|
||||
## 2. The implementation as shipped
|
||||
|
||||
### 2.1 Shared kernel — `src/primality_kernel.rs`
|
||||
|
||||
`include!`d into both `build.rs` and `src/primality.rs`. Inner attributes are
|
||||
disallowed in `include!`d files, hence per-fn `#[allow(dead_code)]`.
|
||||
|
||||
```rust
|
||||
// Deterministic Miller-Rabin kernel — ADR-151 (PIAL).
|
||||
//
|
||||
// `include!`d into two contexts (build.rs and src/primality.rs) which use
|
||||
// different subsets of the symbols. Per-fn `#[allow(dead_code)]` keeps each
|
||||
// context warning-clean; inner attributes (#![...]) aren't legal in
|
||||
// included files.
|
||||
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_mulmod_u64(a: u64, b: u64, m: u64) -> u64 {
|
||||
// u128 product avoids overflow without allocation.
|
||||
((a as u128).wrapping_mul(b as u128) % (m as u128)) as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_powmod_u64(mut base: u64, mut exp: u64, m: u64) -> u64 {
|
||||
if m == 1 {
|
||||
return 0;
|
||||
}
|
||||
let mut acc: u64 = 1;
|
||||
base %= m;
|
||||
while exp > 0 {
|
||||
if exp & 1 == 1 {
|
||||
acc = mr_mulmod_u64(acc, base, m);
|
||||
}
|
||||
exp >>= 1;
|
||||
if exp > 0 {
|
||||
base = mr_mulmod_u64(base, base, m);
|
||||
}
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
||||
// Returns true iff `a` is a Miller-Rabin witness of compositeness for `n`.
|
||||
// Caller guarantees: n is odd, n > 3, and a in [2, n-2]. n - 1 = d * 2^s
|
||||
// with d odd (passed in pre-decomposed for speed).
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_is_composite_witness(n: u64, d: u64, s: u32, a: u64) -> bool {
|
||||
let mut x = mr_powmod_u64(a, d, n);
|
||||
if x == 1 || x == n - 1 {
|
||||
return false;
|
||||
}
|
||||
for _ in 0..s.saturating_sub(1) {
|
||||
x = mr_mulmod_u64(x, x, n);
|
||||
if x == n - 1 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_is_prime_u64(n: u64) -> bool {
|
||||
if n < 2 {
|
||||
return false;
|
||||
}
|
||||
const SMALL_PRIMES: [u64; 12] = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37];
|
||||
for &p in &SMALL_PRIMES {
|
||||
if n == p {
|
||||
return true;
|
||||
}
|
||||
if n.is_multiple_of(p) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// n is now odd, > 37, and coprime to every Sinclair witness.
|
||||
let mut d = n - 1;
|
||||
let mut s: u32 = 0;
|
||||
while d & 1 == 0 {
|
||||
d >>= 1;
|
||||
s += 1;
|
||||
}
|
||||
for &a in &SMALL_PRIMES {
|
||||
if mr_is_composite_witness(n, d, s, a) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_is_prime_u32(n: u32) -> bool {
|
||||
mr_is_prime_u64(n as u64)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_prev_prime_u64(upper: u64) -> u64 {
|
||||
if upper <= 2 { return 0; }
|
||||
if upper == 3 { return 2; }
|
||||
let mut n = upper - 1;
|
||||
if n & 1 == 0 { n -= 1; }
|
||||
loop {
|
||||
if mr_is_prime_u64(n) { return n; }
|
||||
if n <= 3 { return 2; }
|
||||
n -= 2;
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[allow(dead_code)]
|
||||
fn mr_next_prime_u64(lower: u64) -> u64 {
|
||||
if lower < 2 { return 2; }
|
||||
if lower < 3 { return 3; }
|
||||
let largest_u64_prime: u64 = u64::MAX - 58;
|
||||
if lower >= largest_u64_prime { return 0; }
|
||||
let mut n = lower + 1;
|
||||
if n & 1 == 0 { n += 1; }
|
||||
loop {
|
||||
if mr_is_prime_u64(n) { return n; }
|
||||
n += 2;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2.2 Public API — relevant excerpts from `src/primality.rs`
|
||||
|
||||
```rust
|
||||
include!("primality_kernel.rs");
|
||||
include!(concat!(env!("OUT_DIR"), "/prime_tables.rs"));
|
||||
// ↑ provides: pub const PRIMES_BELOW_2K: [u64; 57]
|
||||
// pub const PRIMES_ABOVE_2K: [u64; 57] (last entry = 0 sentinel)
|
||||
|
||||
#[inline]
|
||||
pub fn is_prime_u32(n: u32) -> bool { mr_is_prime_u32(n) }
|
||||
|
||||
#[inline]
|
||||
pub fn is_prime_u64(n: u64) -> bool { mr_is_prime_u64(n) }
|
||||
|
||||
#[inline]
|
||||
pub fn prev_prime_below_pow2(k: u32) -> u64 {
|
||||
debug_assert!((8..=64).contains(&k));
|
||||
PRIMES_BELOW_2K[(k - 8) as usize]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn next_prime_above_pow2(k: u32) -> u64 {
|
||||
debug_assert!((8..=63).contains(&k));
|
||||
PRIMES_ABOVE_2K[(k - 8) as usize]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn prev_prime_u64(n: u64) -> u64 {
|
||||
if n.is_power_of_two() {
|
||||
let k = n.trailing_zeros();
|
||||
if (8..=64).contains(&k) {
|
||||
return PRIMES_BELOW_2K[(k - 8) as usize];
|
||||
}
|
||||
}
|
||||
mr_prev_prime_u64(n)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn next_prime_u64(n: u64) -> u64 {
|
||||
if n.is_power_of_two() {
|
||||
let k = n.trailing_zeros();
|
||||
if (8..=63).contains(&k) {
|
||||
return PRIMES_ABOVE_2K[(k - 8) as usize];
|
||||
}
|
||||
}
|
||||
mr_next_prime_u64(n)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn ephemeral_prime(seed: u64) -> u64 {
|
||||
// Used by pi-brain witness chain (ADR §4.4) for per-share entropy.
|
||||
let mask = (1u64 << 61) - 1;
|
||||
let s = (seed | 1) & mask;
|
||||
if mr_is_prime_u64(s) { s } else { mr_next_prime_u64(s) }
|
||||
}
|
||||
|
||||
// u128 probabilistic mode (cfg-gated on `unstable-u128`):
|
||||
#[cfg(feature = "unstable-u128")]
|
||||
pub fn is_prime_u128(n: u128, rounds: u8) -> bool { /* … 40-round MR with
|
||||
a tiny seeded LCG for witness selection; deferred to mr_is_prime_u64
|
||||
when n <= u64::MAX */ }
|
||||
```
|
||||
|
||||
### 2.3 Tests asserting correctness
|
||||
|
||||
- `tests/primality_pseudoprimes.rs`:
|
||||
- `is_prime_u64(3_215_031_751) == false` (OEIS A014233(4), spsp to {2,3,5,7})
|
||||
- `is_prime_u64(2_152_302_898_747) == false` (A014233(5))
|
||||
- `is_prime_u64(3_825_123_056_546_413_051) == false` (A014233(11), detected
|
||||
only by base 37 — canary for anyone shrinking Sinclair-12)
|
||||
- All primes/composites in [0, 100], 7 Carmichael numbers, edges
|
||||
(0, 1, u64::MAX, u64::MAX − 58, largest u32 prime).
|
||||
- `tests/table_cross_check.rs`:
|
||||
- For each k ∈ [8, 64]: assert `is_prime_u64(PRIMES_BELOW_2K[k-8])` and
|
||||
sweep every odd integer in `(table[k-8], 2^k)` asserting non-primality.
|
||||
- Symmetric for k ∈ [8, 63] on `PRIMES_ABOVE_2K`.
|
||||
- Sentinel: `PRIMES_ABOVE_2K[64-8] == 0`.
|
||||
|
||||
---
|
||||
|
||||
## 3. Measurements (criterion, M-series, release profile)
|
||||
|
||||
### 3.1 Phase-0 benches against the PRD targets
|
||||
|
||||
| Bench | Measured | Target | Status |
|
||||
|--------------------------------------------|-----------|---------|--------|
|
||||
| `prev_prime_below_pow2(32)` (table) | 552 ps | ≤ 1 ns | green |
|
||||
| `next_prime_u64(2^61 − 1)` general MR | 10.97 µs | ≤ 12 µs | green |
|
||||
| `next_prime_u64(arbitrary ≈ 1e9)` general | 2.23 µs | ≤ 2 µs | +11% |
|
||||
| `is_prime_u64(u64::MAX − 58)` worst-case | **15.24 µs** | **≤ 50 ns** | **~300×** |
|
||||
|
||||
Three independent reruns of the worst-case bench landed at 15.24 / 15.79 /
|
||||
15.65 µs — stable within ±2%, not measurement noise.
|
||||
|
||||
### 3.2 Apples-to-apples competitor baseline
|
||||
|
||||
To rule out "this machine is slow today" or "criterion is mismeasuring", we
|
||||
built a throwaway scratch crate (outside the workspace) that compiles a
|
||||
verbatim copy of our kernel alongside `num-prime` 0.4.4. Both run in the
|
||||
same binary on the same input, with the same release profile
|
||||
(`opt-level = 3`, `lto = "thin"`, `codegen-units = 1`).
|
||||
|
||||
| Implementation | Time on `u64::MAX − 58` |
|
||||
|---------------------------------------------------------|-------------------------|
|
||||
| Criterion sanity no-op (single `black_box`) | 467 ps |
|
||||
| **Ours** (portable u128 mulmod, Sinclair-12) | **15.63 µs** |
|
||||
| **`num-prime` 0.4.4** (Montgomery via `num-modular`) | **884 ns** |
|
||||
| PRD §6 target | 50 ns |
|
||||
|
||||
Both implementations agreed on primality (no correctness gap). The 467 ps
|
||||
sanity baseline confirms criterion is reporting honestly — broken benches
|
||||
don't produce 467 ps for a no-op.
|
||||
|
||||
**What this tells us:**
|
||||
|
||||
1. **Our 15.63 µs is real and reproducible**, not a measurement artifact.
|
||||
2. **We are ~17.7× slower than `num-prime`** on the same input. The
|
||||
delta is almost certainly Montgomery-form modular multiplication
|
||||
(`num-prime` pulls `num-modular`, which provides exactly that).
|
||||
3. **`num-prime` itself is ~17.7× slower than the 50 ns target.** No
|
||||
pure-Rust general-purpose primality crate we know of hits 50 ns on an
|
||||
actual large prime; the realistic safe-Rust floor on M-series appears
|
||||
to be ~880 ns.
|
||||
4. The PRD's 50 ns figure is therefore *unachievable* in safe Rust — it
|
||||
would require Montgomery + assembly / SIMD batching across many `n` /
|
||||
leaving the safe subset entirely.
|
||||
|
||||
ADR-151 forbids `num-prime` as a *runtime* dependency, but does not forbid
|
||||
us from porting Montgomery into our own kernel — `num-modular` is
|
||||
MIT/Apache and the technique itself is textbook. That is now a *measured*
|
||||
optimization target with a known ceiling, not a guess.
|
||||
|
||||
---
|
||||
|
||||
## 4. What we are asking you to do
|
||||
|
||||
Produce **one document** with the four sections below. Be specific. Cite
|
||||
sources where possible. Do not propose changes that would violate the
|
||||
constraints in the preamble.
|
||||
|
||||
### Section A — Correctness audit
|
||||
|
||||
Read §2.1 and §2.2. Identify:
|
||||
1. Any soundness bug (a composite that would be reported prime, or vice
|
||||
versa) within the documented input ranges.
|
||||
2. Edge cases not covered by the tests in §2.3 that you would add.
|
||||
3. Any way the table cross-check could pass while masking a real bug
|
||||
(i.e. is the test actually load-bearing?).
|
||||
4. Risks specific to `ephemeral_prime`'s seed → prime mapping when used
|
||||
for witness-chain fingerprinting (ADR §4.4): collisions, attacker
|
||||
precomputation, distribution issues.
|
||||
|
||||
### Section B — Performance plan, ranked
|
||||
|
||||
The sharpened goal, given §3.2's competitor baseline: **close the 17.7×
|
||||
gap to `num-prime` (15.63 µs → ~880 ns) in pure safe Rust, without taking
|
||||
`num-prime` or `num-modular` as a runtime dependency**, AND hit the
|
||||
`next_prime_u64` arbitrary 2 µs target. Treat 50 ns as aspirational; we
|
||||
expect you to recommend a revised PRD target with justification.
|
||||
|
||||
For each proposal:
|
||||
- **Mechanism**: what changes in code (one paragraph, no hand-waving;
|
||||
reference §2.1 line ranges where applicable).
|
||||
- **Expected speedup vs our 15.63 µs baseline**: cite source or give a
|
||||
back-of-envelope; if the technique is what `num-prime` uses, say so.
|
||||
- **Cost**: code complexity (LoC, conceptual difficulty for reviewers),
|
||||
WASM bundle size, any new `unsafe`.
|
||||
- **Compatibility**: does it preserve the source-of-truth invariant
|
||||
(build.rs and runtime use the same kernel via `include!`)? Does it
|
||||
break `no_std`?
|
||||
- **Validation methodology**: the *exact* benchmark and regression test
|
||||
you would add to prove the speedup is real and stable, including
|
||||
the criterion config you would use and the statistical threshold for
|
||||
declaring "passed".
|
||||
|
||||
Rank proposals by `expected_speedup × feasibility / complexity`.
|
||||
|
||||
Candidate techniques to consider (extend or reject as you see fit):
|
||||
- **Montgomery-form modular arithmetic** — likely the single biggest
|
||||
lever based on the `num-prime` comparison. We want a concrete sketch
|
||||
of the API change and a LoC estimate for porting it into our kernel.
|
||||
- Wheel factorization (mod 30 / mod 210) for the small-prime screen.
|
||||
- Branchless witness loops.
|
||||
- Reduced witness sets for sub-ranges (e.g. {2} for n < 2047,
|
||||
{2, 3} for n < 1.4 × 10^9).
|
||||
- Strong-base early-exit ordering (which witness fails fastest on
|
||||
random composites?).
|
||||
- BPSW (Baillie-PSW) instead of MR — different correctness story; we'd
|
||||
need a citation for deterministic-up-to-2^64 status.
|
||||
- Strong Lucas as a deterministic add-on.
|
||||
- Pre-screen by Pollard rho for small-factor composites (does this even
|
||||
beat trial division for the tiny gap between 37 and our actual call
|
||||
rate?).
|
||||
|
||||
Specifically address:
|
||||
1. **Realistic safe-Rust floor for `is_prime_u64` worst-case on M-series.**
|
||||
Our measurement suggests ~880 ns (matching `num-prime`). Confirm or
|
||||
refute, with reasoning.
|
||||
2. **Recommended revised PRD target**, given that floor.
|
||||
3. **The `next_prime_u64(arbitrary)` 2.23 µs vs 2 µs gap** — is this
|
||||
meaningful or noise-band? If real, what closes it?
|
||||
|
||||
### Section C — Architectural review
|
||||
|
||||
1. Is the dual-path design (table + MR fallback) correctly capturing the
|
||||
workload of the five named consumers (shard router, HNSW buckets,
|
||||
sparsifier, mincut LSH, witness chain)? Any consumer where the table
|
||||
would mislead?
|
||||
2. Is `tests/table_cross_check.rs` sufficient as the source-of-truth gate,
|
||||
or is there a stronger invariant we should assert?
|
||||
3. Does `include!` of the kernel into both `build.rs` and `src/primality.rs`
|
||||
create any failure mode you have seen burn other projects?
|
||||
4. The `unstable-u128` feature uses Russian-peasant `mulmod_u128` and a
|
||||
tiny seeded LCG for witness selection. Is that sound for the
|
||||
probabilistic guarantee `Pr[err] < 2⁻⁸⁰` at 40 rounds?
|
||||
|
||||
### Section D — Validation methodology
|
||||
|
||||
For the *whole* Phase-0 deliverable, propose:
|
||||
1. The minimum set of CI gates that would catch a regression in either
|
||||
correctness or performance, and where they should run (PR / nightly /
|
||||
release).
|
||||
2. A reproducible benchmark harness that distinguishes signal from noise
|
||||
on contended hardware (criterion is fine; what statistical thresholds
|
||||
would you set for "pass"?).
|
||||
3. A property-test (proptest/quickcheck-style) strategy that would
|
||||
complement the pinned regressions in §2.3 without re-deriving MR.
|
||||
4. Anything you would add to the `tests/` or `benches/` directory before
|
||||
merging Phase 0.
|
||||
|
||||
---
|
||||
|
||||
## 5. Format of your response
|
||||
|
||||
Plain markdown. Sections A/B/C/D headed exactly as above. For Section B,
|
||||
use a table sorted by your ranking. End with a one-paragraph **Verdict**:
|
||||
should the PR merge as-is, merge with the PRD §6 row relaxed, or block
|
||||
on a specific change?
|
||||
|
||||
Do not be polite. If a proposal in our implementation is wrong, say so
|
||||
directly with line-numbered references into §2.1 / §2.2.
|
||||
113
docs/research/miller-rabin-optimizations/HANDOFF.md
Normal file
113
docs/research/miller-rabin-optimizations/HANDOFF.md
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
# Handoff — Phase 0 Kickoff (PIAL)
|
||||
|
||||
You are starting **Phase 0** of PIAL (Prime-Indexed Acceleration Layer):
|
||||
land the Miller-Rabin primality utility in `ruvector-collections` and
|
||||
nothing else. Five integration phases follow in separate PRs.
|
||||
|
||||
## Read first (in order)
|
||||
|
||||
1. **`docs/adr/ADR-151-miller-rabin-prime-optimizations.md`** — the binding
|
||||
decision (status, scope, acceptance criteria, alternatives rejected).
|
||||
2. **`docs/research/miller-rabin-optimizations/PRD.md`** — full design,
|
||||
five creative use-cases, performance targets, six-phase rollout, risks.
|
||||
3. **This file** — Phase 0 specifics. Do not skip.
|
||||
|
||||
## Branch
|
||||
|
||||
`feat/miller-rabin-prime-optimizations` (off `main`). Already created.
|
||||
|
||||
## Target crate
|
||||
|
||||
`crates/ruvector-collections/` already exists in the workspace. Today it
|
||||
contains `collection.rs`, `error.rs`, `lib.rs`, `manager.rs`. No
|
||||
`benches/` directory and no `build.rs` yet — both are Phase 0 work.
|
||||
|
||||
## Phase 0 Deliverables (four files, one PR)
|
||||
|
||||
| File | Purpose | Source of truth |
|
||||
|---|---|---|
|
||||
| `src/primality.rs` | Deterministic Miller-Rabin for u32/u64; probabilistic for u128; tabled `prev_prime_below_pow2` / `next_prime_above_pow2` fast paths; general `prev_prime_u64` / `next_prime_u64` MR-descent paths; `ephemeral_prime(seed)` for the witness chain | PRD §5 |
|
||||
| `build.rs` | Generate `PRIMES_BELOW_2K[57]` and `PRIMES_ABOVE_2K[57]` (k ∈ [8, 64]) from the MR implementation at compile time; emit as `${OUT_DIR}/prime_tables.rs` for `include!`-inclusion in `primality.rs` | ADR-151 "Generation Strategy" |
|
||||
| `benches/primality.rs` | Criterion benches: `is_prime_u64`, `prev_prime_below_pow2`, `next_prime_u64(arbitrary)`, `next_prime_u64(2^61)`. Targets in PRD §6 | PRD §6 |
|
||||
| `tests/table_cross_check.rs` | For every k ∈ [8, 64], assert `is_prime_u64(PRIMES_BELOW_2K[k-8])` is true and that no prime exists in `(PRIMES_BELOW_2K[k-8], 2^k)`. Same for `_ABOVE_`. This is the gate that makes MR the source of truth | ADR-151 acceptance #2 |
|
||||
|
||||
## Library wiring
|
||||
|
||||
Add `pub mod primality;` to `crates/ruvector-collections/src/lib.rs` and
|
||||
re-export the public API at the crate root. Update the crate-level
|
||||
doc-comment to mention the new module.
|
||||
|
||||
## Dependencies — explicitly do not add
|
||||
|
||||
The PRD rejects `num-prime`, `glass_pumpkin`, and any other external
|
||||
prime/big-integer crates. Use **only** `core` integer arithmetic.
|
||||
Add `criterion` under `[dev-dependencies]` for benches if it is not
|
||||
already inherited via the workspace.
|
||||
|
||||
## Witnesses (the whole correctness story in three lines)
|
||||
|
||||
- `u32`: `{ 2, 7, 61 }` — Pomerance/Selfridge/Wagstaff. Deterministic.
|
||||
- `u64`: `{ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37 }` — Sinclair (2011). Deterministic.
|
||||
- `u128`: 40 random rounds, **only** behind `--feature unstable-u128`. Probabilistic, error < 2⁻⁸⁰.
|
||||
|
||||
## Pinned pseudoprime regressions
|
||||
|
||||
Include these in `tests/primality_pseudoprimes.rs` so future witness-set
|
||||
"optimizations" cannot silently regress correctness:
|
||||
|
||||
- `3_215_031_751` — strong pseudoprime to bases {2, 3, 5, 7} (must be detected by Sinclair-12).
|
||||
- `2_152_302_898_747` — strong pseudoprime to {2, 3, 5, 7, 11}.
|
||||
- `3_825_123_056_546_413_051` — large 64-bit known-hard composite.
|
||||
|
||||
Add small-prime sanity (1, 2, 3, 4, 5, 7, 9, ..., 100) and edge cases
|
||||
(0, 1, `u64::MAX`, `u64::MAX - 58` which is the largest u64 prime).
|
||||
|
||||
## Performance targets (from PRD §6)
|
||||
|
||||
| Operation | M-series | WASM |
|
||||
|---|---|---|
|
||||
| `is_prime_u64` worst-case | ≤ 50 ns | ≤ 200 ns |
|
||||
| `prev_prime_below_pow2(k)` (table) | ≤ 1 ns | ≤ 2 ns |
|
||||
| `next_prime_u64(2^32)` (table) | ≤ 1 ns | ≤ 2 ns |
|
||||
| `next_prime_u64(arbitrary N)` (general MR) | ≤ 2 µs | ≤ 8 µs |
|
||||
| `next_prime_u64(2^61)` (general MR) | ≤ 12 µs | ≤ 40 µs |
|
||||
|
||||
## Phase 0 is "Done" when
|
||||
|
||||
ADR-151 acceptance criteria #1, #2, #3 are all green:
|
||||
|
||||
1. `cargo test -p ruvector-collections primality` passes (includes pinned pseudoprimes).
|
||||
2. `cargo test -p ruvector-collections primality::table_cross_check` validates all 114 table entries against MR.
|
||||
3. `cargo bench -p ruvector-collections primality` meets the targets above on M-series.
|
||||
|
||||
**Do not start Phase 1 in this PR.** Phases ship as separate PRs
|
||||
(PRD §7). Keep this one tightly scoped to the utility itself.
|
||||
|
||||
## First commands in the new session
|
||||
|
||||
```bash
|
||||
# Confirm you are on the right branch
|
||||
git status # should show "On branch feat/miller-rabin-prime-optimizations" with no changes
|
||||
|
||||
# Baseline — confirm the crate compiles before you touch it
|
||||
cargo check -p ruvector-collections
|
||||
|
||||
# Re-read the binding documents
|
||||
cat docs/adr/ADR-151-miller-rabin-prime-optimizations.md | head -80
|
||||
cat docs/research/miller-rabin-optimizations/PRD.md | sed -n '150,260p' # §5 API + §6 perf
|
||||
```
|
||||
|
||||
Then start with `crates/ruvector-collections/src/primality.rs`. The
|
||||
deterministic u64 Miller-Rabin is ~80 lines including comments;
|
||||
everything else (tables via `build.rs`, benches, cross-check test)
|
||||
follows mechanically from it.
|
||||
|
||||
## What is explicitly **not** Phase 0
|
||||
|
||||
- Editing `crates/ruvector-graph/` (that's Phase 1).
|
||||
- Editing any HNSW crate (Phase 2).
|
||||
- Editing sparsifier or attn-mincut (Phase 3).
|
||||
- Editing `crates/mcp-brain-server/` or pi-brain payloads (Phase 4).
|
||||
- Editing CNN / quantization codebooks (Phase 5).
|
||||
|
||||
If you find yourself touching any of those, stop and split the PR.
|
||||
369
docs/research/miller-rabin-optimizations/PRD.md
Normal file
369
docs/research/miller-rabin-optimizations/PRD.md
Normal file
|
|
@ -0,0 +1,369 @@
|
|||
# PRD: Prime-Indexed Acceleration Layer (PIAL)
|
||||
|
||||
> Creative Miller-Rabin–driven optimizations for ruvector's hashing,
|
||||
> sharding, sketching, and witness-chain layers.
|
||||
|
||||
| Field | Value |
|
||||
|--------------------|------------------------------------------------------|
|
||||
| **Status** | Draft |
|
||||
| **Date** | 2026-04-16 |
|
||||
| **Owner** | RuVector Core / Architecture |
|
||||
| **Related ADR** | ADR-151 (this PRD's binding decision record) |
|
||||
| **Cross-refs** | ADR-027 (HNSW), ADR-038 (witness), ADR-058 (hash), |
|
||||
| | ADR-148/149 (brain perf), ADR-150 (π-brain) |
|
||||
| **Tier (ADR-026)** | T1 (Agent Booster eligible) for the core utility; |
|
||||
| | T2 (Haiku) for the integration patches. |
|
||||
|
||||
---
|
||||
|
||||
## 1. Background
|
||||
|
||||
Three years of incremental work have left ruvector with several places where
|
||||
**arithmetic on indices, hashes, and shard keys defaults to power-of-two
|
||||
moduli** — convenient on hardware (`x & (N - 1)`), pathological on real data:
|
||||
|
||||
| Site | Current modulus | Failure mode |
|
||||
|---------------------------------------------------|--------------------|------------------------------------------------------------|
|
||||
| `ruvector-graph` shard router (ADR-058 #6) | `xxh3_64() mod 2^k`| ~50% collision @ 2³² nodes; biased on Zipfian keys |
|
||||
| `micro-hnsw-wasm` adjacency map | open-addressed 2^k | clustering on near-duplicate vectors (e.g. timestamps) |
|
||||
| `ruvector-sparsifier` stride sampler | power-of-2 stride | aliasing on lattice / image-grid graphs |
|
||||
| `ruvector-attn-mincut` LSH sketch | ad-hoc constant | breaks 2-independence of universal hash family |
|
||||
| pi-brain witness fingerprint (ADR-038) | XXH3 only | single-hash tamper risk; no per-share entropy |
|
||||
|
||||
The fix in every one of these is **the same primitive**: a fast, deterministic
|
||||
primality test that lets us mint a prime *near a target size* on demand.
|
||||
|
||||
We choose **Miller-Rabin** because it is:
|
||||
|
||||
- **Deterministic** for all `u64` inputs with the Sinclair witness set
|
||||
`{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}` — no probabilistic guarantees
|
||||
needed for our hot paths.
|
||||
- **O(k · log³ n)** — a `next_prime(2^32)` call costs ~2 µs in benchmarks;
|
||||
amortized to zero against shard-rebalance cycles.
|
||||
- **WASM-friendly** — pure integer arithmetic, no FFI, fits in <1 KB compiled.
|
||||
- **Tier-1 eligible** under ADR-026 — pure transform work, no LLM in the loop.
|
||||
|
||||
This PRD frames a single, surgically scoped utility (`primality.rs`) that
|
||||
*unblocks* a portfolio of creative optimizations across the workspace. The
|
||||
binding architectural commitments live in ADR-151.
|
||||
|
||||
---
|
||||
|
||||
## 2. Goals
|
||||
|
||||
| # | Goal | Metric / Acceptance |
|
||||
|---|----------------------------------------------------------------------|------------------------------------------------------|
|
||||
| G1| Provide `is_prime`, `next_prime`, `prev_prime` over `u32`/`u64` | Deterministic, ≥ 200 M ops/s on M-series |
|
||||
| G2| Re-shard ruvector-graph by prime modulus | ≥ 30% reduction in shard-load std-dev on Zipfian load|
|
||||
| G3| Convert HNSW adjacency tables to prime-bucket open addressing | ≥ 15% drop in p99 insert latency at 1 M vectors |
|
||||
| G4| Replace LSH stride/modulus constants with certified primes | Restore 2-independence; pass property tests |
|
||||
| G5| Add per-share ephemeral prime fingerprint to π-brain witness chain | +8 bytes/share; published in `brain_share` payload |
|
||||
| G6| Cross-target: the utility compiles for native, WASM, and `no_std` | Single crate, no feature-flag explosion |
|
||||
|
||||
## 3. Non-Goals
|
||||
|
||||
- **No cryptographic key generation.** Miller-Rabin alone is *not* a substitute
|
||||
for proven-prime generation in RSA/ECC; we only use it for hashing/sharding.
|
||||
- **No new heap allocations** in the inner loop — the utility must be
|
||||
allocation-free past the (constant-size) witness array.
|
||||
- **No replacement** of `prime-radiant` (which is a coherence-gate crate and
|
||||
unrelated despite the name collision).
|
||||
- **No big-integer support.** 64-bit (and an opt-in `u128` mode) is enough for
|
||||
every ruvector use case identified above.
|
||||
- **No SHAKE/HMAC redesign.** ADR-058's other findings stand independently.
|
||||
|
||||
---
|
||||
|
||||
## 4. Creative Use-Cases (the "why this is interesting")
|
||||
|
||||
### 4.1 Prime-Modulus Shard Routing — *direct fix for ADR-058 #6*
|
||||
|
||||
Today's shard router is `xxh3_64(node_id) & (shards - 1)`. The mask discards
|
||||
all but `log₂(shards)` bits of entropy, which is exactly when adversarial /
|
||||
Zipfian inputs cluster. Replacing it with `xxh3_64(node_id) % p`, where
|
||||
`p = prev_prime(shards)`, recovers full entropy and gives provably balanced
|
||||
buckets under universal hashing.
|
||||
|
||||
> **Creative twist:** because `prev_prime(k)` is cheap, we can *adapt* the
|
||||
> modulus during a rolling re-shard (every N minutes) — the cluster never
|
||||
> sees a power-of-two pathology because the modulus literally never *is* a
|
||||
> power of two for two consecutive epochs.
|
||||
|
||||
### 4.2 Prime-Bucket HNSW Adjacency
|
||||
|
||||
`micro-hnsw-wasm` and `ruvector-hyperbolic-hnsw` store edges in open-addressed
|
||||
tables sized to the next power of two. Probe-sequence collisions on
|
||||
near-duplicate vectors (e.g. real-time sensor or timestamp embeddings) blow up
|
||||
p99 insert latency. Switching to `prev_prime(2^k)` capacity with linear or
|
||||
quadratic probing keeps the table size cache-friendly while breaking the
|
||||
power-of-two clustering.
|
||||
|
||||
### 4.3 Certified Modulus for Universal LSH
|
||||
|
||||
Several sketch modules (`ruvector-attn-mincut`, sparsifier samplers) build
|
||||
hash families of the form `((a · x + b) mod p) mod m`. The 2-independence
|
||||
guarantee *requires* `p` to be prime and `> universe_size`. Today these are
|
||||
hand-picked Mersenne-shaped constants (`2^61 − 1`, `2^31 − 1`); when the
|
||||
universe grows past those bounds the family silently degrades. Miller-Rabin
|
||||
lets us call `next_prime(universe_size)` on dataset load and store the chosen
|
||||
modulus alongside the index.
|
||||
|
||||
### 4.4 Witness-Chain Ephemeral Primes (π-brain)
|
||||
|
||||
The pi-brain witness chain (ADR-038, CLAUDE.md "Witness Chain Rules")
|
||||
currently fingerprints each shared memory with XXH3 only. We propose:
|
||||
|
||||
```text
|
||||
share = { payload, fingerprint_xxh3, ephemeral_prime q, fingerprint_modq }
|
||||
where q = next_prime( seed = SHA256(payload)[0..8] )
|
||||
```
|
||||
|
||||
A tampering peer attempting to substitute payloads must collide *both*
|
||||
fingerprints — including a hash modulo a prime `q` they cannot precompute,
|
||||
because `q` is derived per-share. Cost: 8 bytes on the wire, ~2 µs at the
|
||||
sender, ~50 ns at every verifier. The asymmetry is the point.
|
||||
|
||||
### 4.5 Anti-Aliasing Stride for Sparsifier Sampling
|
||||
|
||||
Spectral sparsifiers in `ruvector-sparsifier` use stride-based subsampling
|
||||
when the importance sketch is too expensive. Power-of-two strides alias
|
||||
brutally on grid-structured graphs (image, mesh, lattice). A prime stride
|
||||
breaks the alignment for the same reason linear-congruential generators
|
||||
demand prime moduli — borrowed wisdom, decades old, free to reuse.
|
||||
|
||||
### 4.6 Bonus: Prime-Sized Quantization Codebooks
|
||||
|
||||
Product-quantization codebooks (used by ruvector-cnn-wasm and ruQu) sized to
|
||||
prime cardinalities show measurably better recall@k on standard benchmarks
|
||||
than power-of-two codebooks because they break the implicit "code-of-codes"
|
||||
correlation across sub-spaces. This is an opt-in mode, not a default.
|
||||
|
||||
---
|
||||
|
||||
## 5. Proposed Architecture
|
||||
|
||||
```
|
||||
┌──────────────────────────────────────────────────────────────┐
|
||||
│ crates/ruvector-collections/src/primality.rs (new, ~250 LoC) │
|
||||
│ │
|
||||
│ pub fn is_prime_u32(n: u32) -> bool // {2,7,61} │
|
||||
│ pub fn is_prime_u64(n: u64) -> bool // Sinclair-12 │
|
||||
│ pub fn is_prime_u128(n: u128, k: u8) -> bool // probabilistic│
|
||||
│ pub fn next_prime_u64(n: u64) -> u64 │
|
||||
│ pub fn prev_prime_u64(n: u64) -> u64 │
|
||||
│ pub fn ephemeral_prime(seed: u64) -> u64 // for §4.4 │
|
||||
│ │
|
||||
│ #[cfg(target_arch = "wasm32")] // shares same impl │
|
||||
└──────────────────┬───────────────────────────┬────────────────┘
|
||||
│ │
|
||||
┌──────────┴──────────┐ ┌─────────┴───────────┐
|
||||
▼ ▼ ▼ ▼
|
||||
shard router HNSW buckets LSH families witness chain
|
||||
(ruvector-graph) (micro-hnsw) (sparsifier, (mcp-brain-server,
|
||||
attn-mincut) pi-brain)
|
||||
```
|
||||
|
||||
### Why `ruvector-collections`?
|
||||
|
||||
- It already houses cross-cutting data-structure utilities.
|
||||
- All five consumers depend on it transitively, so no new edges in the
|
||||
dependency graph.
|
||||
- Keeps the workspace top-level crate count flat (we have 60+ already).
|
||||
|
||||
### Public API (sketch)
|
||||
|
||||
```rust
|
||||
//! crates/ruvector-collections/src/primality.rs
|
||||
//!
|
||||
//! Deterministic Miller-Rabin primality for u32/u64 and probabilistic
|
||||
//! Miller-Rabin for u128. Allocation-free, no_std-friendly.
|
||||
//!
|
||||
//! Hot-path strategy: tabled primes for the common power-of-two-aligned
|
||||
//! sizes (zero runtime cost), Miller-Rabin descent as the general fallback.
|
||||
|
||||
#[inline]
|
||||
pub const fn is_prime_u32(n: u32) -> bool { /* witnesses: 2, 7, 61 */ }
|
||||
|
||||
#[inline]
|
||||
pub const fn is_prime_u64(n: u64) -> bool {
|
||||
// Sinclair (2011): deterministic for all u64
|
||||
// witnesses: 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37
|
||||
}
|
||||
|
||||
pub fn is_prime_u128(n: u128, rounds: u8) -> bool { /* probabilistic */ }
|
||||
|
||||
// ── Generation: dual-path ────────────────────────────────────────────
|
||||
//
|
||||
// Fast path: lookup table for "largest prime < 2^k", k ∈ [8, 64].
|
||||
// CI validates every entry against the Miller-Rabin descent at build
|
||||
// time, so the table is never the source of truth — MR is.
|
||||
const PRIMES_BELOW_2K: [u64; 57] = [
|
||||
251, // < 2^8
|
||||
509, // < 2^9
|
||||
1021, // < 2^10
|
||||
// ... entries for k = 11..=31 ...
|
||||
4_294_967_291, // < 2^32 (shard-router common case)
|
||||
// ... entries for k = 33..=63 ...
|
||||
18_446_744_073_709_551_557, // < 2^64
|
||||
];
|
||||
|
||||
#[inline]
|
||||
pub const fn prev_prime_below_pow2(k: u32) -> u64 {
|
||||
debug_assert!(k >= 8 && k <= 64);
|
||||
PRIMES_BELOW_2K[(k - 8) as usize]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn prev_prime_u64(n: u64) -> u64 {
|
||||
// Fast path: power-of-two-aligned inputs (HNSW buckets, shard sizes)
|
||||
if n.is_power_of_two() && n.trailing_zeros() >= 8 {
|
||||
return prev_prime_below_pow2(n.trailing_zeros());
|
||||
}
|
||||
// General path: 6k±1 wheel + Miller-Rabin descent
|
||||
miller_rabin_descent(n, Direction::Down)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn next_prime_u64(n: u64) -> u64 {
|
||||
if n.is_power_of_two() && n.trailing_zeros() >= 8 {
|
||||
// Symmetric optional fast path: PRIMES_ABOVE_2K table
|
||||
return next_prime_above_pow2(n.trailing_zeros());
|
||||
}
|
||||
miller_rabin_descent(n, Direction::Up)
|
||||
}
|
||||
|
||||
pub fn ephemeral_prime(seed: u64) -> u64 {
|
||||
// seed → next_prime((seed | 1) % 2^61) — used by witness chain (§4.4)
|
||||
// No table — input is unpredictable by design.
|
||||
}
|
||||
```
|
||||
|
||||
### Why the dual-path matters
|
||||
|
||||
Three of PIAL's five generation sites (shard router, HNSW bucket sizing,
|
||||
sparsifier strides) ask for primes near *fixed* sizes that never change
|
||||
between releases. The table converts those calls into a single L1-cached
|
||||
load — no Miller-Rabin work at runtime at all.
|
||||
|
||||
The two unpredictable sites (LSH universe, witness-chain ephemeral primes)
|
||||
fall through to the general MR path. They're cold paths anyway —
|
||||
microsecond-scale generation cost is invisible against the surrounding work.
|
||||
|
||||
**Crucially, MR is still the source of truth.** A `build.rs` script
|
||||
regenerates `PRIMES_BELOW_2K` and `PRIMES_ABOVE_2K` from the MR
|
||||
implementation on every build, and a `#[test]` cross-checks every entry
|
||||
under `cargo test`. The table is an *amortization*, not a substitute.
|
||||
|
||||
| Generation site | Path taken | Runtime cost |
|
||||
|-----------------------------|--------------------|--------------|
|
||||
| Shard router (`prev_prime(2^k)`) | Fast (table) | ~1 ns |
|
||||
| HNSW bucket (`prev_prime(2^k)`) | Fast (table) | ~1 ns |
|
||||
| Sparsifier stride (table-friendly)| Fast (table) | ~1 ns |
|
||||
| LSH modulus (`next_prime(N)`) | General (MR) | ~250 ns |
|
||||
| Witness ephemeral (`next_prime(seed)`)| General (MR) | ~250 ns |
|
||||
|
||||
---
|
||||
|
||||
## 6. Performance Targets
|
||||
|
||||
> **Revised 2026-04-16 (Phase 0).** The original `is_prime_u64` worst-case
|
||||
> target of 50 ns was found to be unachievable in pure safe Rust;
|
||||
> `num-prime` itself measures ~880 ns on the same hardware. Target relaxed
|
||||
> to track the empirical safe-Rust ceiling. See §6.1 and the Phase 0
|
||||
> Findings section of ADR-151 for the full justification.
|
||||
|
||||
| Operation | Target (M-series) | Target (WASM) |
|
||||
|------------------------------------------------|---------------------|--------------------|
|
||||
| `is_prime_u64(p)` (worst-case) | **≤ 1 µs** *(was 50 ns)* | **≤ 4 µs** *(was 200 ns)* |
|
||||
| `prev_prime_below_pow2(k)` (table fast path) | **≤ 1 ns** | **≤ 2 ns** |
|
||||
| `next_prime_u64(2^32)` (table fast path) | **≤ 1 ns** | **≤ 2 ns** |
|
||||
| `next_prime_u64(arbitrary N)` (general MR path)| ≤ 2 µs | ≤ 8 µs |
|
||||
| `next_prime_u64(2^61)` (general MR path) | ≤ 12 µs | ≤ 40 µs |
|
||||
| Shard re-route on 1 M nodes | ≤ 30 ms (one-shot) | n/a |
|
||||
| HNSW p99 insert @ 1 M vectors | -15% vs baseline | -10% vs baseline |
|
||||
| WASM bundle growth from `PRIMES_BELOW_2K`+`_ABOVE_2K` | n/a | ≤ 1 KB total |
|
||||
|
||||
Benchmarks live in `crates/ruvector-collections/benches/primality.rs` and run
|
||||
under existing `npm run bench` infrastructure.
|
||||
|
||||
### 6.1 Empirical findings (Phase 0)
|
||||
|
||||
Phase 0 measurements on M-series, criterion release profile:
|
||||
|
||||
| Bench | Measured | Revised target | Status |
|
||||
|--------------------------------------------|-----------|----------------|--------|
|
||||
| `prev_prime_below_pow2(32)` | 552 ps | ≤ 1 ns | met |
|
||||
| `next_prime_u64(2^61 − 1)` | 10.97 µs | ≤ 12 µs | met |
|
||||
| `next_prime_u64(arbitrary ≈ 1e9)` | 2.23 µs | ≤ 2 µs | +11% |
|
||||
| `is_prime_u64(u64::MAX − 58)` worst-case | 15.24 µs | ≤ 1 µs | does not meet revised target — Phase 0.1 |
|
||||
|
||||
A throwaway scratch crate compiling a verbatim copy of our kernel
|
||||
alongside `num-prime` 0.4.4 in the same binary on the same input
|
||||
measured **ours = 15.63 µs, num-prime = 884 ns** (criterion sanity no-op
|
||||
= 467 ps confirms harness honesty). The 17.7× gap is recoverable in pure
|
||||
safe Rust by porting Montgomery-form modular multiplication into
|
||||
`mr_mulmod_u64` / `mr_powmod_u64` (~80 LoC). That is Phase 0.1 scope and
|
||||
ships in a separate PR; see ADR-151 "Phase 0 Findings" for the full plan
|
||||
and the explicit rejection of the empirical 7-witness "Sinclair" set as
|
||||
a correctness regression dressed as a perf win.
|
||||
|
||||
---
|
||||
|
||||
## 7. Rollout Plan
|
||||
|
||||
| Phase | Scope | Gate |
|
||||
|-------|-------------------------------------------------------------------------|--------------------------------------------|
|
||||
| **0** | Land `primality.rs` + tests + benches in `ruvector-collections` | `npm test && npm run lint` green |
|
||||
| **1** | Wire `next_prime` into ruvector-graph shard router behind feature flag | A/B Zipfian load; ≥ 30% std-dev reduction |
|
||||
| **2** | Convert HNSW adjacency to prime buckets (micro-hnsw-wasm first) | recall@k unchanged; p99 insert -15% |
|
||||
| **3** | Switch sparsifier + attn-mincut LSH families to certified primes | property tests pass; no regression in cuts |
|
||||
| **4** | Ship ephemeral-prime fingerprint in pi-brain witness payload (opt-in) | `brain_share` accepts new field; verifiers |
|
||||
| | | tolerant of absence (backward compatible) |
|
||||
| **5** | Optional: prime-sized PQ codebooks in ruvector-cnn-wasm | recall@10 ≥ baseline on SIFT-1M |
|
||||
|
||||
Each phase is a separate PR; no big-bang merge.
|
||||
|
||||
---
|
||||
|
||||
## 8. Risks & Mitigations
|
||||
|
||||
| Risk | Mitigation |
|
||||
|-----------------------------------------------------------------|----------------------------------------------------------------|
|
||||
| Modulo-by-prime is a *division*, slower than mask | Use Lemire's `fastmod` (one mul + one shift) — already in tree |
|
||||
| Sinclair witness set has subtle bugs in edge cases (n < 9) | Hard-code small-prime fast path + 100% branch coverage tests |
|
||||
| WASM `u128` codegen is ~5× slower than native | u128 mode is opt-in; default paths are u64 |
|
||||
| Cluster mid-flight reshard exposes intermediate state | Phase 1 ships behind `--feature prime-shard`; rollout is gated |
|
||||
| Witness-chain change breaks older pi-brain peers | New field is `Option<…>`; verifiers ignore-on-absent |
|
||||
| "Yet another collections crate" sprawl | All work lives in *existing* `ruvector-collections` |
|
||||
|
||||
---
|
||||
|
||||
## 9. Open Questions
|
||||
|
||||
1. Should `next_prime_u64` accept a *budget* (max-distance) and return
|
||||
`Option<u64>` instead of looping unbounded? (Probably yes.)
|
||||
2. Do we want a `PrimeModHash<H>` newtype wrapper that auto-applies fastmod,
|
||||
or expose `prev_prime` and let callers compose? (Lean: wrapper.)
|
||||
3. Does the witness-chain ephemeral prime need to be authenticated under the
|
||||
sender's key, or is per-share derivation from `SHA256(payload)` enough?
|
||||
(Defer to security review during Phase 4.)
|
||||
|
||||
---
|
||||
|
||||
## 10. Out of Scope (deliberately)
|
||||
|
||||
- Big-integer / arbitrary-precision Miller-Rabin (use `num-bigint` if ever
|
||||
needed — not on the roadmap).
|
||||
- Replacing XXH3 as ruvector's primary hash (ADR-058's job).
|
||||
- Strong-pseudoprime-based Lucas certificates (yagni for hashing).
|
||||
- Distributed prime-generation protocols (we mint locally, deterministically).
|
||||
|
||||
---
|
||||
|
||||
## 11. Approval Checklist
|
||||
|
||||
- [ ] Architecture review (links ADR-151)
|
||||
- [ ] Security review (esp. §4.4 witness chain)
|
||||
- [ ] Performance baseline captured for shard-router and HNSW p99
|
||||
- [ ] WASM size budget verified (`micro-hnsw-wasm` < +2 KB)
|
||||
- [ ] Documentation: README in `ruvector-collections` references new module
|
||||
Loading…
Add table
Add a link
Reference in a new issue