mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-27 00:25:10 +00:00
RabitqPlusIndex::from_vectors_parallel rotates + bit-packs every vector in parallel using rayon, then commits the SoA serially. Produces a bit-identical index to the serial add loop — rotation matrix is seeded once at construction and encode is deterministic, so parallel ordering cannot affect output bytes. VectorCache::prime picks between serial add() and the new parallel constructor based on batch size (PARALLEL_PRIME_THRESHOLD = 1024). Below 1k vectors the rayon task-queue overhead outweighs the D×D rotation savings; above it the parallel path dominates. Measured (clustered D=128, rerank×20): n=5k prime 22.3 ms → 4.5 ms (4.9×) n=50k prime 213 ms → 19.6 ms (10.9×) n=100k prime 420 ms → 37.6 ms (11.2×) This is the biggest cold-start-latency win available in M1. Real backend deployments where prime cost is the critical-path latency on a cache miss now see p99 drop by an order of magnitude. rayon dep is no longer feature-gated in rabitq (it's already a runtime dep via the workspace-pinned 1.10 that ruLake uses). 40 tests passing. Clippy -D warnings clean. Co-Authored-By: claude-flow <ruv@ruv.net>
28 lines
688 B
TOML
28 lines
688 B
TOML
[package]
|
|
name = "ruvector-rabitq"
|
|
version.workspace = true
|
|
edition.workspace = true
|
|
rust-version.workspace = true
|
|
license.workspace = true
|
|
authors.workspace = true
|
|
repository.workspace = true
|
|
description = "RaBitQ: rotation-based 1-bit quantization for ultra-fast approximate nearest-neighbor search with theoretical error bounds"
|
|
|
|
[[bin]]
|
|
name = "rabitq-demo"
|
|
path = "src/main.rs"
|
|
|
|
[[bench]]
|
|
name = "rabitq_bench"
|
|
harness = false
|
|
|
|
[dependencies]
|
|
rand = { workspace = true }
|
|
rand_distr = { workspace = true }
|
|
rayon = { workspace = true }
|
|
serde = { workspace = true }
|
|
serde_json = { workspace = true }
|
|
thiserror = { workspace = true }
|
|
|
|
[dev-dependencies]
|
|
criterion = { workspace = true }
|