ruvector/Cargo.toml
ruvnet 3a1afa2284 feat(rulake): vector-native federation intermediary — ADR-155 + MVP crate
Implements the M1 scope of docs/research/ruLake/ as an intermediary that
fans out vector queries across heterogeneous backends (Parquet, BigQuery,
Snowflake, Delta, Iceberg, local) behind a single RVF wire protocol, with
a RaBitQ-compressed cache in front.

## What ships

- **Research docs** under docs/research/ruLake/ (9 files, ~2.5k lines),
  reframed from the earlier "plug RVF into BigQuery" shape to the
  intermediary/federation shape. BigQuery-native compute becomes a Tier-2
  push-down optimization inside the BigQueryBackend adapter, not a new
  product shape.
- **ADR-155 v2** as "Proposed" — captures the seven alternatives
  considered (plug-in-per-lake, standalone vector DB, Iceberg extension,
  Trino connector, JVM intermediary, notebook-only, push-through-only),
  consequences, and eight open questions.
- **crates/ruvector-rulake/** — new workspace member:
  - `BackendAdapter` trait with minimum surface (id / list_collections /
    pull_vectors / generation / supports_pushdown).
  - `LocalBackend` in-memory reference implementation (thread-safe).
  - `VectorCache` wrapping ruvector_rabitq::RabitqPlusIndex, with per-
    collection generation tracking and `Consistency::{Fresh, Eventual}`
    policies.
  - `RuLake` entry point: register backends, search single or federated,
    cache-stats introspection.
  - 7 smoke tests (`tests/federation_smoke.rs`): byte-exact match vs
    direct RaBitQ, cache-coherence after backend mutation, cross-backend
    fan-out with correct score ordering, cache-hit-faster-than-miss,
    three error-path tests.
  - `rulake-demo` bin: unified benchmark producing the same-run table in
    BENCHMARK.md.

## Measured numbers (LocalBackend, D=128, rerank×20, 300 queries)

| n       | direct RaBitQ+ QPS | ruLake Fresh QPS | ruLake Eventual QPS | tax   |
|--------:|-------------------:|-----------------:|--------------------:|------:|
|   5,000 |             17,311 |           17,874 |              17,858 | 0.97× |
|  50,000 |              5,162 |            5,123 |               5,050 | 1.01× |
| 100,000 |              3,122 |            3,117 |               3,114 | 1.00× |

**Intermediary tax is effectively zero on a local backend.** Federated
across 2 shards: 2,470 QPS @ n=100k (0.79× of single-shard); 4 shards:
1,781 QPS (0.57×) — sequential fan-out, parallel merge is the v2
optimisation per ADR-155 §Consequences.

## Build + test status (this crate only)

```
cargo build  -p ruvector-rulake --release                            ✓
cargo test   -p ruvector-rulake --release                            ✓ 7 passed
cargo clippy -p ruvector-rulake --release --all-targets -- -D warnings   ✓ clean
cargo fmt    -p ruvector-rulake -- --check                           ✓ clean
cargo run    -p ruvector-rulake --release --bin rulake-demo          ✓ reproduces BENCHMARK.md
```

## Scope this commit does NOT cover (M2-M5, see 07-implementation-plan.md)

- ParquetBackend, BigQueryBackend, SnowflakeBackend, IcebergBackend,
  DeltaBackend (real-backend adapters).
- Push-down paths into backends with native vector ops.
- Governance / RBAC / PII / lineage / audit (M4).
- SIFT1M recall measurement on the real-backend path.
- Parallel fan-out via rayon.
- LRU cache eviction.

Co-Authored-By: claude-flow <ruv@ruv.net>
2026-04-23 18:38:49 -04:00

294 lines
9.7 KiB
TOML

[workspace]
exclude = ["crates/micro-hnsw-wasm", "crates/ruvector-hyperbolic-hnsw", "crates/ruvector-hyperbolic-hnsw-wasm", "examples/ruvLLM/esp32", "examples/ruvLLM/esp32-flash", "examples/edge-net", "examples/data", "examples/ruvLLM", "examples/delta-behavior", "crates/rvf", "crates/rvf/*", "crates/rvf/*/*", "examples/rvf-desktop", "crates/mcp-brain-server"]
members = [
"crates/ruvector-rabitq",
"crates/ruvector-rulake",
"crates/ruvector-core",
"crates/ruvector-node",
"crates/ruvector-wasm",
"crates/ruvector-cli",
"crates/ruvector-bench",
"crates/ruvector-metrics",
"crates/ruvector-filter",
"crates/ruvector-router-core",
"crates/ruvector-router-cli",
"crates/ruvector-router-ffi",
"crates/ruvector-router-wasm",
"crates/ruvector-server",
"crates/ruvector-snapshot",
"crates/ruvector-tiny-dancer-core",
"crates/ruvector-tiny-dancer-wasm",
"crates/ruvector-tiny-dancer-node",
"crates/ruvector-collections",
"crates/ruvector-cluster",
"crates/ruvector-raft",
"crates/ruvector-replication",
"crates/ruvector-graph",
"crates/ruvector-graph-node",
"crates/ruvector-graph-wasm",
"crates/ruvector-gnn",
"crates/ruvector-gnn-node",
"crates/ruvector-gnn-wasm",
"crates/ruvector-attention",
"crates/ruvector-attention-wasm",
"crates/ruvector-attention-node",
"crates/ruvector-cnn",
"crates/ruvector-cnn-wasm",
"crates/ruvector-mincut",
"crates/ruvector-mincut-wasm",
"crates/ruvector-mincut-node",
"crates/ruvector-mincut-gated-transformer",
"crates/ruvector-mincut-gated-transformer-wasm",
"crates/ruvector-postgres",
"crates/ruvector-nervous-system",
"examples/refrag-pipeline",
"examples/scipix",
"examples/google-cloud",
"examples/subpolynomial-time",
"crates/sona",
"crates/rvlite",
"crates/ruvector-nervous-system",
"crates/ruvector-dag",
"crates/ruvector-dag-wasm",
"crates/ruvector-nervous-system-wasm",
"crates/ruvector-economy-wasm",
"crates/ruvector-learning-wasm",
"crates/ruvector-exotic-wasm",
"crates/ruvector-attention-unified-wasm",
"crates/ruvector-fpga-transformer",
"crates/ruvector-fpga-transformer-wasm",
"crates/ruvector-sparse-inference",
"crates/ruvector-math",
"crates/ruvector-math-wasm",
"examples/benchmarks",
"crates/cognitum-gate-kernel",
"crates/cognitum-gate-tilezero",
"crates/mcp-gate",
"crates/mcp-brain",
"crates/mcp-brain-server",
"crates/ruQu",
"crates/ruvllm",
"crates/ruvllm-cli",
"crates/ruvllm-wasm",
"crates/prime-radiant",
"crates/ruvector-delta-core",
"crates/ruvector-delta-wasm",
"crates/ruvector-delta-index",
"crates/ruvector-delta-graph",
"crates/ruvector-delta-consensus",
"crates/ruvector-crv",
"crates/ruvector-temporal-tensor",
"crates/ruqu-core",
"crates/ruqu-algorithms",
"crates/ruqu-wasm",
"crates/ruqu-exotic",
"crates/ruvector-domain-expansion",
"crates/ruvector-domain-expansion-wasm",
"crates/ruvector-solver",
"crates/ruvector-solver-wasm",
"crates/ruvector-solver-node",
"examples/dna",
"examples/OSpipe",
"crates/ruvector-coherence",
"crates/ruvector-profiler",
"crates/ruvector-attn-mincut",
"crates/ruvector-cognitive-container",
"crates/ruvector-verified",
"crates/ruvector-verified-wasm",
"crates/ruvector-graph-transformer",
"crates/ruvector-graph-transformer-wasm",
"crates/ruvector-graph-transformer-node",
"examples/rvf-kernel-optimized",
"examples/verified-applications",
"crates/thermorust",
"crates/ruvector-dither",
"crates/ruvector-robotics",
"examples/robotics",
"crates/neural-trader-core",
"crates/neural-trader-coherence",
"crates/neural-trader-replay",
"crates/neural-trader-wasm",
# Kalshi integration (ADR-153)
"crates/ruvector-kalshi",
"crates/neural-trader-strategies",
# RuVix Cognition Kernel (organized under crates/ruvix/)
"crates/ruvix/crates/types",
"crates/ruvix/crates/region",
"crates/ruvix/crates/queue",
"crates/ruvix/crates/cap",
"crates/ruvix/crates/proof",
"crates/ruvix/crates/sched",
"crates/ruvix/crates/boot",
"crates/ruvix/crates/vecgraph",
"crates/ruvix/crates/nucleus",
# Phase B: Bare metal AArch64 support
"crates/ruvix/crates/hal",
"crates/ruvix/crates/aarch64",
"crates/ruvix/crates/drivers",
"crates/ruvix/tests",
"crates/ruvix/benches",
"crates/ruvix/examples/cognitive_demo",
# rvAgent — AI Agent Framework (DeepAgents Rust conversion)
"crates/rvAgent/rvagent-core",
"crates/rvAgent/rvagent-backends",
"crates/rvAgent/rvagent-middleware",
"crates/rvAgent/rvagent-tools",
"crates/rvAgent/rvagent-subagents",
"crates/rvAgent/rvagent-cli",
"crates/rvAgent/rvagent-acp",
"crates/rvAgent/rvagent-mcp",
"crates/rvAgent/rvagent-wasm",
# ETL pipeline example
"examples/train-discoveries",
# Spectral graph sparsification
"crates/ruvector-sparsifier",
"crates/ruvector-sparsifier-wasm",
# Consciousness metrics (IIT Φ, causal emergence)
"crates/ruvector-consciousness",
"crates/ruvector-consciousness-wasm",
"examples/cmb-consciousness",
"examples/gw-consciousness",
"examples/ecosystem-consciousness",
"examples/quantum-consciousness",
"examples/gene-consciousness",
"examples/climate-consciousness",
# JS bundle decompiler (ADR-135)
"crates/ruvector-decompiler",
"crates/ruvector-decompiler-wasm",
# DiskANN / Vamana (ADR-143)
"crates/ruvector-diskann",
"crates/ruvector-diskann-node",
# Boundary-first scientific discovery PoC
"examples/boundary-discovery",
# CMB Cold Spot boundary-first discovery
"examples/cmb-boundary-discovery",
# FRB population boundary discovery (CHIME-like data)
"examples/frb-boundary-discovery",
# Cosmic void boundary information content
"examples/void-boundary-discovery",
# Multi-regime temporal attractor boundary detection
"examples/temporal-attractor-discovery",
# Music genre boundary discovery via spectral graph bisection
"examples/music-boundary-discovery",
# Weather regime boundary detection (variance/correlation precedes temperature)
"examples/weather-boundary-discovery",
# Market regime boundary discovery via correlation structure
"examples/market-boundary-discovery",
# Health state boundary detection from wearable sensor data
"examples/health-boundary-discovery",
# SETI exotic signals gallery: boundary-first detection of sub-threshold signals
"examples/seti-exotic-signals",
# SETI boundary-first discovery: sub-noise signal detection via coherence graphs
"examples/seti-boundary-discovery",
# Earthquake precursor detection via inter-station correlation boundary shifts
"examples/earthquake-boundary-discovery",
# Pandemic outbreak detection 60 days before case counts via correlation boundaries
"examples/pandemic-boundary-discovery",
# Infrastructure failure prediction via sensor correlation boundaries
"examples/infrastructure-boundary-discovery",
# Pre-seizure detection via brain correlation boundary shifts
"examples/brain-boundary-discovery",
# Clinical-publication-grade pre-seizure detection report with CSV output
"examples/seizure-clinical-report",
# Closed-loop seizure detection + therapeutic response simulation
"examples/seizure-therapeutic-sim",
# Real EEG analysis: CHB-MIT PhysioNet data with boundary-first detection
"examples/real-eeg-analysis",
# Multi-seizure cross-patient analysis: all 7 chb01 seizures
"examples/real-eeg-multi-seizure",
]
resolver = "2"
[workspace.package]
version = "2.2.0"
edition = "2021"
rust-version = "1.77"
license = "MIT"
authors = ["Ruvector Team"]
repository = "https://github.com/ruvnet/ruvector"
[workspace.dependencies]
# Core functionality
redb = "2.1"
memmap2 = "0.9"
hnsw_rs = "0.3"
simsimd = "5.9"
rayon = "1.10"
crossbeam = "0.8"
# Serialization
rkyv = "0.8"
bincode = { version = "2.0.0-rc.3", features = ["serde"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
# Node.js bindings
napi = { version = "2.16", default-features = false, features = ["napi9", "async", "tokio_rt"] }
napi-derive = "2.16"
# WASM
wasm-bindgen = "0.2"
wasm-bindgen-futures = "0.4"
js-sys = "0.3"
web-sys = { version = "0.3", features = ["Worker", "MessagePort", "console"] }
getrandom = { version = "0.3", features = ["wasm_js"] }
# Async runtime
tokio = { version = "1.41", features = ["rt-multi-thread", "sync", "macros"] }
futures = "0.3"
# Error handling and utilities
thiserror = "2.0"
anyhow = "1.0"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
# Math and numerics
nalgebra = { version = "0.33", default-features = false, features = ["std"] }
ndarray = "0.16"
rand = "0.8"
rand_distr = "0.4"
# Time and UUID
chrono = { version = "0.4", features = ["serde"] }
uuid = { version = "1.11", features = ["v4", "serde", "js"] }
# CLI
clap = { version = "4.5", features = ["derive", "cargo"] }
indicatif = "0.17"
console = "0.15"
# Testing and benchmarking
criterion = { version = "0.5", features = ["html_reports"] }
proptest = "1.5"
mockall = "0.13"
# Formal verification
lean-agentic = "=0.1.0"
# Performance
dashmap = "6.1"
parking_lot = "0.12"
once_cell = "1.20"
[profile.release]
opt-level = 3
lto = "fat"
codegen-units = 1
strip = true
panic = "unwind"
[profile.bench]
inherits = "release"
debug = true
[profile.dev]
opt-level = 0
debug = true
[profile.test]
# Patch hnsw_rs to use rand 0.8 instead of 0.9 for WASM compatibility
# This resolves the getrandom version conflict (0.2 vs 0.3)
[patch.crates-io]
hnsw_rs = { path = "./patches/hnsw_rs" }