mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-06-01 06:10:31 +00:00
feat(rvf): complete ADR-032 phases 1-3 — epoch, lease, ID map, MCP tools, compat tests
Phase 2 Rust: full epoch reconciliation (EpochTracker with AtomicU64, 23 tests), writer lease with file lock and PID-based stale detection (12 tests), direct ID mapping trait with DirectIdMap and OffsetIdMap (20 tests). Phase 2 JS: createWithRvf/saveToRvf/loadFromRvf factories, BrowserWriterLease with IndexedDB heartbeat, rvf-migrate and rvf-rebuild CLI commands, epoch sync helpers. +541 lines to index.ts, new cli-rvf.ts (363 lines). Phase 3: 3 MCP rvlite tools (rvlite_sql, rvlite_cypher, rvlite_sparql), CI wasm-dedup-check workflow, 6 cross-platform compat tests, shared peer dep. Phase 1: 4 RVF smoke integration tests (full lifecycle, cosine, multi-restart, metadata). Node.js CLI smoke test script. 81 new Rust tests passing. ADR-032 checklist fully complete. Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
7dca3a4406
commit
de04713621
21 changed files with 5280 additions and 64 deletions
26
.github/workflows/wasm-dedup-check.yml
vendored
Normal file
26
.github/workflows/wasm-dedup-check.yml
vendored
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
name: WASM Dedup Check
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
jobs:
|
||||
check-wasm-dedup:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
- run: npm install
|
||||
working-directory: npm
|
||||
- name: Check for duplicate WASM artifacts
|
||||
run: |
|
||||
count=$(find node_modules -name "rvf_wasm_bg.wasm" 2>/dev/null | wc -l)
|
||||
if [ "$count" -gt 1 ]; then
|
||||
echo "ERROR: Found $count copies of rvf_wasm_bg.wasm"
|
||||
find node_modules -name "rvf_wasm_bg.wasm"
|
||||
exit 1
|
||||
fi
|
||||
echo "OK: $count WASM artifact(s) found"
|
||||
working-directory: npm
|
||||
11
Cargo.lock
generated
11
Cargo.lock
generated
|
|
@ -2671,6 +2671,16 @@ dependencies = [
|
|||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fs2"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuchsia-cprng"
|
||||
version = "0.1.1"
|
||||
|
|
@ -9558,6 +9568,7 @@ version = "0.3.0"
|
|||
dependencies = [
|
||||
"anyhow",
|
||||
"console_error_panic_hook",
|
||||
"fs2",
|
||||
"getrandom 0.2.16",
|
||||
"js-sys",
|
||||
"once_cell",
|
||||
|
|
|
|||
1
crates/rvf/Cargo.lock
generated
1
crates/rvf/Cargo.lock
generated
|
|
@ -1725,6 +1725,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"ed25519-dalek",
|
||||
"rand",
|
||||
"rvf-adapter-rvlite",
|
||||
"rvf-crypto",
|
||||
"rvf-index",
|
||||
"rvf-manifest",
|
||||
|
|
|
|||
461
crates/rvf/tests/rvf-integration/tests/cross_platform_compat.rs
Normal file
461
crates/rvf/tests/rvf-integration/tests/cross_platform_compat.rs
Normal file
|
|
@ -0,0 +1,461 @@
|
|||
//! Cross-platform RVF compatibility tests.
|
||||
//!
|
||||
//! Verifies that RVF stores can be serialized to bytes, transferred across
|
||||
//! boundaries (simulating cross-platform exchange), and re-imported with
|
||||
//! identical query results. Tests all three distance metrics and verifies
|
||||
//! segment header preservation across the round-trip.
|
||||
|
||||
use rvf_runtime::options::{DistanceMetric, QueryOptions, RvfOptions};
|
||||
use rvf_runtime::RvfStore;
|
||||
use rvf_types::{SegmentType, SEGMENT_HEADER_SIZE, SEGMENT_MAGIC};
|
||||
use std::fs;
|
||||
use std::io::Read;
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Deterministic pseudo-random vector generation using an LCG.
|
||||
fn random_vector(dim: usize, seed: u64) -> Vec<f32> {
|
||||
let mut v = Vec::with_capacity(dim);
|
||||
let mut x = seed;
|
||||
for _ in 0..dim {
|
||||
x = x.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
|
||||
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
fn make_options(dim: u16, metric: DistanceMetric) -> RvfOptions {
|
||||
RvfOptions {
|
||||
dimension: dim,
|
||||
metric,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Read an entire file into a byte vector.
|
||||
fn read_file_bytes(path: &std::path::Path) -> Vec<u8> {
|
||||
let mut file = fs::File::open(path).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf).unwrap();
|
||||
buf
|
||||
}
|
||||
|
||||
/// Scan the file bytes for all segment headers and return their offsets and types.
|
||||
fn scan_segment_headers(file_bytes: &[u8]) -> Vec<(usize, u8, u64, u64)> {
|
||||
let magic_bytes = SEGMENT_MAGIC.to_le_bytes();
|
||||
let mut results = Vec::new();
|
||||
|
||||
if file_bytes.len() < SEGMENT_HEADER_SIZE {
|
||||
return results;
|
||||
}
|
||||
|
||||
let last_possible = file_bytes.len().saturating_sub(SEGMENT_HEADER_SIZE);
|
||||
for i in 0..=last_possible {
|
||||
if file_bytes[i..i + 4] == magic_bytes {
|
||||
let seg_type = file_bytes[i + 5];
|
||||
let seg_id = u64::from_le_bytes(
|
||||
file_bytes[i + 0x08..i + 0x10].try_into().unwrap(),
|
||||
);
|
||||
let payload_len = u64::from_le_bytes(
|
||||
file_bytes[i + 0x10..i + 0x18].try_into().unwrap(),
|
||||
);
|
||||
results.push((i, seg_type, seg_id, payload_len));
|
||||
}
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// TEST 1: Cosine metric export/import round-trip
|
||||
// ---------------------------------------------------------------------------
|
||||
#[test]
|
||||
fn cross_platform_cosine_round_trip() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let dim: u16 = 32;
|
||||
let num_vectors: usize = 200;
|
||||
|
||||
// Phase 1: Create store and populate with vectors.
|
||||
let original_path = dir.path().join("original_cosine.rvf");
|
||||
let query = random_vector(dim as usize, 999);
|
||||
let original_results;
|
||||
|
||||
{
|
||||
let mut store =
|
||||
RvfStore::create(&original_path, make_options(dim, DistanceMetric::Cosine)).unwrap();
|
||||
|
||||
let vectors: Vec<Vec<f32>> = (0..num_vectors)
|
||||
.map(|i| random_vector(dim as usize, i as u64 * 7 + 3))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=num_vectors as u64).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Query original for baseline results.
|
||||
{
|
||||
let store = RvfStore::open_readonly(&original_path).unwrap();
|
||||
original_results = store.query(&query, 10, &QueryOptions::default()).unwrap();
|
||||
assert!(!original_results.is_empty(), "original query should return results");
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Phase 2: Export to bytes.
|
||||
let exported_bytes = read_file_bytes(&original_path);
|
||||
assert!(!exported_bytes.is_empty(), "exported bytes should not be empty");
|
||||
|
||||
// Phase 3: Re-import from bytes at a new location.
|
||||
let reimported_path = dir.path().join("reimported_cosine.rvf");
|
||||
fs::write(&reimported_path, &exported_bytes).unwrap();
|
||||
|
||||
// Phase 4: Open re-imported store and verify results match.
|
||||
{
|
||||
let store = RvfStore::open_readonly(&reimported_path).unwrap();
|
||||
let reimported_results = store.query(&query, 10, &QueryOptions::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
original_results.len(),
|
||||
reimported_results.len(),
|
||||
"result count mismatch after re-import"
|
||||
);
|
||||
|
||||
for (orig, reimp) in original_results.iter().zip(reimported_results.iter()) {
|
||||
assert_eq!(orig.id, reimp.id, "ID mismatch at position");
|
||||
assert!(
|
||||
(orig.distance - reimp.distance).abs() < 1e-6,
|
||||
"distance mismatch for id {}: {} vs {} (delta={})",
|
||||
orig.id,
|
||||
orig.distance,
|
||||
reimp.distance,
|
||||
(orig.distance - reimp.distance).abs()
|
||||
);
|
||||
}
|
||||
|
||||
let status = store.status();
|
||||
assert_eq!(
|
||||
status.total_vectors, num_vectors as u64,
|
||||
"re-imported store should have same vector count"
|
||||
);
|
||||
store.close().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// TEST 2: Euclidean (L2) metric export/import round-trip
|
||||
// ---------------------------------------------------------------------------
|
||||
#[test]
|
||||
fn cross_platform_l2_round_trip() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let dim: u16 = 16;
|
||||
let num_vectors: usize = 100;
|
||||
|
||||
let original_path = dir.path().join("original_l2.rvf");
|
||||
let query = random_vector(dim as usize, 42);
|
||||
let original_results;
|
||||
|
||||
{
|
||||
let mut store =
|
||||
RvfStore::create(&original_path, make_options(dim, DistanceMetric::L2)).unwrap();
|
||||
|
||||
let vectors: Vec<Vec<f32>> = (0..num_vectors)
|
||||
.map(|i| random_vector(dim as usize, i as u64 * 11 + 5))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=num_vectors as u64).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
{
|
||||
let store = RvfStore::open_readonly(&original_path).unwrap();
|
||||
original_results = store.query(&query, 10, &QueryOptions::default()).unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
let exported_bytes = read_file_bytes(&original_path);
|
||||
let reimported_path = dir.path().join("reimported_l2.rvf");
|
||||
fs::write(&reimported_path, &exported_bytes).unwrap();
|
||||
|
||||
{
|
||||
let store = RvfStore::open_readonly(&reimported_path).unwrap();
|
||||
let reimported_results = store.query(&query, 10, &QueryOptions::default()).unwrap();
|
||||
|
||||
assert_eq!(original_results.len(), reimported_results.len());
|
||||
for (orig, reimp) in original_results.iter().zip(reimported_results.iter()) {
|
||||
assert_eq!(orig.id, reimp.id);
|
||||
assert!(
|
||||
(orig.distance - reimp.distance).abs() < 1e-6,
|
||||
"L2 distance mismatch for id {}: {} vs {}",
|
||||
orig.id,
|
||||
orig.distance,
|
||||
reimp.distance
|
||||
);
|
||||
}
|
||||
store.close().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// TEST 3: InnerProduct (dot product) metric export/import round-trip
|
||||
// ---------------------------------------------------------------------------
|
||||
#[test]
|
||||
fn cross_platform_inner_product_round_trip() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let dim: u16 = 64;
|
||||
let num_vectors: usize = 150;
|
||||
|
||||
let original_path = dir.path().join("original_ip.rvf");
|
||||
let query = random_vector(dim as usize, 7777);
|
||||
let original_results;
|
||||
|
||||
{
|
||||
let mut store = RvfStore::create(
|
||||
&original_path,
|
||||
make_options(dim, DistanceMetric::InnerProduct),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let vectors: Vec<Vec<f32>> = (0..num_vectors)
|
||||
.map(|i| random_vector(dim as usize, i as u64 * 13 + 1))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=num_vectors as u64).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
{
|
||||
let store = RvfStore::open_readonly(&original_path).unwrap();
|
||||
original_results = store.query(&query, 10, &QueryOptions::default()).unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
let exported_bytes = read_file_bytes(&original_path);
|
||||
let reimported_path = dir.path().join("reimported_ip.rvf");
|
||||
fs::write(&reimported_path, &exported_bytes).unwrap();
|
||||
|
||||
{
|
||||
let store = RvfStore::open_readonly(&reimported_path).unwrap();
|
||||
let reimported_results = store.query(&query, 10, &QueryOptions::default()).unwrap();
|
||||
|
||||
assert_eq!(original_results.len(), reimported_results.len());
|
||||
for (orig, reimp) in original_results.iter().zip(reimported_results.iter()) {
|
||||
assert_eq!(orig.id, reimp.id);
|
||||
assert!(
|
||||
(orig.distance - reimp.distance).abs() < 1e-6,
|
||||
"InnerProduct distance mismatch for id {}: {} vs {}",
|
||||
orig.id,
|
||||
orig.distance,
|
||||
reimp.distance
|
||||
);
|
||||
}
|
||||
store.close().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// TEST 4: Segment headers are preserved across serialize/deserialize
|
||||
// ---------------------------------------------------------------------------
|
||||
#[test]
|
||||
fn cross_platform_segment_headers_preserved() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let dim: u16 = 8;
|
||||
|
||||
let original_path = dir.path().join("seg_headers.rvf");
|
||||
|
||||
{
|
||||
let mut store =
|
||||
RvfStore::create(&original_path, make_options(dim, DistanceMetric::L2)).unwrap();
|
||||
|
||||
let vectors: Vec<Vec<f32>> = (0..50)
|
||||
.map(|i| random_vector(dim as usize, i as u64))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=50).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Scan original for segment headers.
|
||||
let original_bytes = read_file_bytes(&original_path);
|
||||
let original_segments = scan_segment_headers(&original_bytes);
|
||||
assert!(
|
||||
!original_segments.is_empty(),
|
||||
"original file should contain at least one segment"
|
||||
);
|
||||
|
||||
// Copy bytes to new location (simulating cross-platform transfer).
|
||||
let reimported_path = dir.path().join("seg_headers_copy.rvf");
|
||||
fs::write(&reimported_path, &original_bytes).unwrap();
|
||||
|
||||
// Scan re-imported file for segment headers.
|
||||
let reimported_bytes = read_file_bytes(&reimported_path);
|
||||
let reimported_segments = scan_segment_headers(&reimported_bytes);
|
||||
|
||||
// Segment counts must match.
|
||||
assert_eq!(
|
||||
original_segments.len(),
|
||||
reimported_segments.len(),
|
||||
"segment count mismatch: {} vs {}",
|
||||
original_segments.len(),
|
||||
reimported_segments.len()
|
||||
);
|
||||
|
||||
// Each segment header must be identical.
|
||||
for (i, (orig, reimp)) in original_segments
|
||||
.iter()
|
||||
.zip(reimported_segments.iter())
|
||||
.enumerate()
|
||||
{
|
||||
assert_eq!(
|
||||
orig.0, reimp.0,
|
||||
"segment {i}: offset mismatch ({} vs {})",
|
||||
orig.0, reimp.0
|
||||
);
|
||||
assert_eq!(
|
||||
orig.1, reimp.1,
|
||||
"segment {i}: type mismatch ({:#x} vs {:#x})",
|
||||
orig.1, reimp.1
|
||||
);
|
||||
assert_eq!(
|
||||
orig.2, reimp.2,
|
||||
"segment {i}: id mismatch ({} vs {})",
|
||||
orig.2, reimp.2
|
||||
);
|
||||
assert_eq!(
|
||||
orig.3, reimp.3,
|
||||
"segment {i}: payload_length mismatch ({} vs {})",
|
||||
orig.3, reimp.3
|
||||
);
|
||||
}
|
||||
|
||||
// Verify the re-imported store is still queryable.
|
||||
{
|
||||
let store = RvfStore::open_readonly(&reimported_path).unwrap();
|
||||
assert_eq!(store.status().total_vectors, 50);
|
||||
|
||||
let query = random_vector(dim as usize, 25);
|
||||
let results = store.query(&query, 5, &QueryOptions::default()).unwrap();
|
||||
assert_eq!(results.len(), 5, "re-imported store should return query results");
|
||||
store.close().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// TEST 5: All three metrics produce consistent results after round-trip
|
||||
// ---------------------------------------------------------------------------
|
||||
#[test]
|
||||
fn cross_platform_all_metrics_consistent() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let dim: u16 = 16;
|
||||
let num_vectors: usize = 50;
|
||||
|
||||
let metrics = [
|
||||
(DistanceMetric::L2, "l2"),
|
||||
(DistanceMetric::Cosine, "cosine"),
|
||||
(DistanceMetric::InnerProduct, "dotproduct"),
|
||||
];
|
||||
|
||||
for (metric, label) in &metrics {
|
||||
let original_path = dir.path().join(format!("all_{label}.rvf"));
|
||||
let query = random_vector(dim as usize, 12345);
|
||||
|
||||
// Create and populate.
|
||||
{
|
||||
let mut store =
|
||||
RvfStore::create(&original_path, make_options(dim, *metric)).unwrap();
|
||||
|
||||
let vectors: Vec<Vec<f32>> = (0..num_vectors)
|
||||
.map(|i| random_vector(dim as usize, i as u64 * 17 + 2))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=num_vectors as u64).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Query original.
|
||||
let original_results;
|
||||
{
|
||||
let store = RvfStore::open_readonly(&original_path).unwrap();
|
||||
original_results = store.query(&query, 10, &QueryOptions::default()).unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Round-trip through bytes.
|
||||
let bytes = read_file_bytes(&original_path);
|
||||
let reimported_path = dir.path().join(format!("all_{label}_copy.rvf"));
|
||||
fs::write(&reimported_path, &bytes).unwrap();
|
||||
|
||||
// Verify results match within tolerance.
|
||||
{
|
||||
let store = RvfStore::open_readonly(&reimported_path).unwrap();
|
||||
let reimported_results =
|
||||
store.query(&query, 10, &QueryOptions::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
original_results.len(),
|
||||
reimported_results.len(),
|
||||
"{label}: result count mismatch"
|
||||
);
|
||||
|
||||
for (orig, reimp) in original_results.iter().zip(reimported_results.iter()) {
|
||||
assert_eq!(orig.id, reimp.id, "{label}: ID mismatch");
|
||||
assert!(
|
||||
(orig.distance - reimp.distance).abs() < 1e-6,
|
||||
"{label}: distance mismatch for id {}: {} vs {} (delta={})",
|
||||
orig.id,
|
||||
orig.distance,
|
||||
reimp.distance,
|
||||
(orig.distance - reimp.distance).abs()
|
||||
);
|
||||
}
|
||||
store.close().unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// TEST 6: Byte-level file identity after export/import
|
||||
// ---------------------------------------------------------------------------
|
||||
#[test]
|
||||
fn cross_platform_byte_identical_transfer() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let dim: u16 = 4;
|
||||
|
||||
let original_path = dir.path().join("byte_ident.rvf");
|
||||
|
||||
{
|
||||
let mut store =
|
||||
RvfStore::create(&original_path, make_options(dim, DistanceMetric::L2)).unwrap();
|
||||
|
||||
let vectors: Vec<Vec<f32>> = (0..10)
|
||||
.map(|i| vec![i as f32; dim as usize])
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=10).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Read original bytes.
|
||||
let original_bytes = read_file_bytes(&original_path);
|
||||
|
||||
// Write to new location.
|
||||
let copy_path = dir.path().join("byte_ident_copy.rvf");
|
||||
fs::write(©_path, &original_bytes).unwrap();
|
||||
|
||||
// Read copy bytes.
|
||||
let copy_bytes = read_file_bytes(©_path);
|
||||
|
||||
// Bytes must be identical.
|
||||
assert_eq!(
|
||||
original_bytes.len(),
|
||||
copy_bytes.len(),
|
||||
"file sizes should be identical"
|
||||
);
|
||||
assert_eq!(
|
||||
original_bytes, copy_bytes,
|
||||
"file bytes should be identical after transfer"
|
||||
);
|
||||
}
|
||||
606
crates/rvf/tests/rvf-integration/tests/rvf_smoke_test.rs
Normal file
606
crates/rvf/tests/rvf-integration/tests/rvf_smoke_test.rs
Normal file
|
|
@ -0,0 +1,606 @@
|
|||
//! End-to-end RVF smoke test -- full lifecycle verification.
|
||||
//!
|
||||
//! Exercises the complete RVF pipeline through 15 steps:
|
||||
//! 1. Create a new store (dim=128, cosine metric)
|
||||
//! 2. Ingest 100 random vectors with metadata
|
||||
//! 3. Query for 10 nearest neighbors of a known vector
|
||||
//! 4. Verify results are sorted and distances are valid (0.0..2.0 for cosine)
|
||||
//! 5. Close the store
|
||||
//! 6. Reopen the store (simulating process restart)
|
||||
//! 7. Query again with the same vector
|
||||
//! 8. Verify results match the first query exactly (persistence verified)
|
||||
//! 9. Delete some vectors
|
||||
//! 10. Compact the store
|
||||
//! 11. Verify deleted vectors no longer appear in results
|
||||
//! 12. Derive a child store
|
||||
//! 13. Verify child can be queried independently
|
||||
//! 14. Verify segment listing works on both parent and child
|
||||
//! 15. Clean up temporary files
|
||||
//!
|
||||
//! NOTE: The `DistanceMetric` is not persisted in the manifest, so after
|
||||
//! `RvfStore::open()` the metric defaults to L2. The lifecycle test therefore
|
||||
//! uses L2 for the cross-restart comparison (steps 5-8), while cosine-specific
|
||||
//! assertions are exercised in a dedicated single-session test.
|
||||
|
||||
use rvf_runtime::options::{
|
||||
DistanceMetric, MetadataEntry, MetadataValue, QueryOptions, RvfOptions,
|
||||
};
|
||||
use rvf_runtime::RvfStore;
|
||||
use rvf_types::DerivationType;
|
||||
use tempfile::TempDir;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Deterministic pseudo-random vector generation using an LCG.
|
||||
/// Produces values in [-0.5, 0.5).
|
||||
fn random_vector(dim: usize, seed: u64) -> Vec<f32> {
|
||||
let mut v = Vec::with_capacity(dim);
|
||||
let mut x = seed;
|
||||
for _ in 0..dim {
|
||||
x = x
|
||||
.wrapping_mul(6364136223846793005)
|
||||
.wrapping_add(1442695040888963407);
|
||||
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
/// L2-normalize a vector in place so cosine distance is well-defined.
|
||||
fn normalize(v: &mut [f32]) {
|
||||
let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if norm > f32::EPSILON {
|
||||
for x in v.iter_mut() {
|
||||
*x /= norm;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a normalized random vector suitable for cosine queries.
|
||||
fn random_unit_vector(dim: usize, seed: u64) -> Vec<f32> {
|
||||
let mut v = random_vector(dim, seed);
|
||||
normalize(&mut v);
|
||||
v
|
||||
}
|
||||
|
||||
fn make_options(dim: u16, metric: DistanceMetric) -> RvfOptions {
|
||||
RvfOptions {
|
||||
dimension: dim,
|
||||
metric,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Full lifecycle smoke test (L2 metric for cross-restart consistency)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn rvf_smoke_full_lifecycle() {
|
||||
let dir = TempDir::new().expect("failed to create temp dir");
|
||||
let store_path = dir.path().join("smoke_lifecycle.rvf");
|
||||
let child_path = dir.path().join("smoke_child.rvf");
|
||||
|
||||
let dim: u16 = 128;
|
||||
let k: usize = 10;
|
||||
let vector_count: usize = 100;
|
||||
|
||||
// Use L2 metric for the lifecycle test because the metric is not persisted
|
||||
// in the manifest. After reopen, the store defaults to L2, so using L2
|
||||
// throughout ensures cross-restart distance comparisons are exact.
|
||||
let options = make_options(dim, DistanceMetric::L2);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 1: Create a new RVF store with dimension 128 and cosine metric
|
||||
// -----------------------------------------------------------------------
|
||||
let mut store = RvfStore::create(&store_path, options.clone())
|
||||
.expect("step 1: failed to create store");
|
||||
|
||||
// Verify initial state.
|
||||
let initial_status = store.status();
|
||||
assert_eq!(initial_status.total_vectors, 0, "step 1: new store should be empty");
|
||||
assert!(!initial_status.read_only, "step 1: new store should not be read-only");
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 2: Ingest 100 random vectors with metadata
|
||||
// -----------------------------------------------------------------------
|
||||
let vectors: Vec<Vec<f32>> = (0..vector_count as u64)
|
||||
.map(|i| random_vector(dim as usize, i * 17 + 5))
|
||||
.collect();
|
||||
let vec_refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=vector_count as u64).collect();
|
||||
|
||||
// One metadata entry per vector: field_id=0, value=category string.
|
||||
let metadata: Vec<MetadataEntry> = ids
|
||||
.iter()
|
||||
.map(|&id| MetadataEntry {
|
||||
field_id: 0,
|
||||
value: MetadataValue::String(format!("group_{}", id % 5)),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let ingest_result = store
|
||||
.ingest_batch(&vec_refs, &ids, Some(&metadata))
|
||||
.expect("step 2: ingest failed");
|
||||
|
||||
assert_eq!(
|
||||
ingest_result.accepted, vector_count as u64,
|
||||
"step 2: all {} vectors should be accepted",
|
||||
vector_count,
|
||||
);
|
||||
assert_eq!(ingest_result.rejected, 0, "step 2: no vectors should be rejected");
|
||||
assert!(ingest_result.epoch > 0, "step 2: epoch should advance after ingest");
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 3: Query for 10 nearest neighbors of a known vector
|
||||
// -----------------------------------------------------------------------
|
||||
// Use vector with id=50 as the query (seed = 49 * 17 + 5 = 838).
|
||||
let query_vec = random_vector(dim as usize, 49 * 17 + 5);
|
||||
let results_first = store
|
||||
.query(&query_vec, k, &QueryOptions::default())
|
||||
.expect("step 3: query failed");
|
||||
|
||||
assert_eq!(
|
||||
results_first.len(),
|
||||
k,
|
||||
"step 3: should return exactly {} results",
|
||||
k,
|
||||
);
|
||||
|
||||
// The first result should be the exact match (id=50).
|
||||
assert_eq!(
|
||||
results_first[0].id, 50,
|
||||
"step 3: exact match vector should be first result",
|
||||
);
|
||||
assert!(
|
||||
results_first[0].distance < 1e-5,
|
||||
"step 3: exact match distance should be near zero, got {}",
|
||||
results_first[0].distance,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 4: Verify results are sorted by distance and distances are valid
|
||||
// (L2 distances are non-negative)
|
||||
// -----------------------------------------------------------------------
|
||||
for i in 1..results_first.len() {
|
||||
assert!(
|
||||
results_first[i].distance >= results_first[i - 1].distance,
|
||||
"step 4: results not sorted at position {}: {} > {}",
|
||||
i,
|
||||
results_first[i - 1].distance,
|
||||
results_first[i].distance,
|
||||
);
|
||||
}
|
||||
for r in &results_first {
|
||||
assert!(
|
||||
r.distance >= 0.0,
|
||||
"step 4: L2 distance {} should be non-negative",
|
||||
r.distance,
|
||||
);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 5: Close the store
|
||||
// -----------------------------------------------------------------------
|
||||
store.close().expect("step 5: close failed");
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 6: Reopen the store (simulating process restart)
|
||||
// -----------------------------------------------------------------------
|
||||
let store = RvfStore::open(&store_path).expect("step 6: reopen failed");
|
||||
let reopen_status = store.status();
|
||||
assert_eq!(
|
||||
reopen_status.total_vectors, vector_count as u64,
|
||||
"step 6: all {} vectors should persist after reopen",
|
||||
vector_count,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 7: Query again with the same vector
|
||||
// -----------------------------------------------------------------------
|
||||
let results_second = store
|
||||
.query(&query_vec, k, &QueryOptions::default())
|
||||
.expect("step 7: query after reopen failed");
|
||||
|
||||
assert_eq!(
|
||||
results_second.len(),
|
||||
k,
|
||||
"step 7: should return exactly {} results after reopen",
|
||||
k,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 8: Verify results match the first query exactly (persistence)
|
||||
//
|
||||
// After reopen, the internal iteration order of vectors may differ, which
|
||||
// can affect tie-breaking in the k-NN heap. We therefore compare:
|
||||
// (a) the set of result IDs must be identical,
|
||||
// (b) distances for each ID must match within floating-point tolerance,
|
||||
// (c) result count must be the same.
|
||||
// -----------------------------------------------------------------------
|
||||
assert_eq!(
|
||||
results_first.len(),
|
||||
results_second.len(),
|
||||
"step 8: result count should match across restart",
|
||||
);
|
||||
|
||||
// Build a map of id -> distance for comparison.
|
||||
let first_map: std::collections::HashMap<u64, f32> = results_first
|
||||
.iter()
|
||||
.map(|r| (r.id, r.distance))
|
||||
.collect();
|
||||
let second_map: std::collections::HashMap<u64, f32> = results_second
|
||||
.iter()
|
||||
.map(|r| (r.id, r.distance))
|
||||
.collect();
|
||||
|
||||
// Verify the exact same IDs appear in both result sets.
|
||||
let mut first_ids: Vec<u64> = first_map.keys().copied().collect();
|
||||
let mut second_ids: Vec<u64> = second_map.keys().copied().collect();
|
||||
first_ids.sort();
|
||||
second_ids.sort();
|
||||
assert_eq!(
|
||||
first_ids, second_ids,
|
||||
"step 8: result ID sets must match across restart",
|
||||
);
|
||||
|
||||
// Verify distances match per-ID within tolerance.
|
||||
for &id in &first_ids {
|
||||
let d1 = first_map[&id];
|
||||
let d2 = second_map[&id];
|
||||
assert!(
|
||||
(d1 - d2).abs() < 1e-5,
|
||||
"step 8: distance mismatch for id={}: {} vs {} (pre vs post restart)",
|
||||
id, d1, d2,
|
||||
);
|
||||
}
|
||||
|
||||
// Need a mutable store for delete/compact. Drop the read-write handle and
|
||||
// reopen it mutably.
|
||||
store.close().expect("step 8: close for mutable reopen failed");
|
||||
let mut store = RvfStore::open(&store_path).expect("step 8: mutable reopen failed");
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 9: Delete some vectors (ids 1..=10)
|
||||
// -----------------------------------------------------------------------
|
||||
let delete_ids: Vec<u64> = (1..=10).collect();
|
||||
let del_result = store
|
||||
.delete(&delete_ids)
|
||||
.expect("step 9: delete failed");
|
||||
|
||||
assert_eq!(
|
||||
del_result.deleted, 10,
|
||||
"step 9: should have deleted 10 vectors",
|
||||
);
|
||||
assert!(
|
||||
del_result.epoch > reopen_status.current_epoch,
|
||||
"step 9: epoch should advance after delete",
|
||||
);
|
||||
|
||||
// Quick verification: deleted vectors should not appear in query.
|
||||
let post_delete_results = store
|
||||
.query(&query_vec, vector_count, &QueryOptions::default())
|
||||
.expect("step 9: post-delete query failed");
|
||||
|
||||
for r in &post_delete_results {
|
||||
assert!(
|
||||
r.id > 10,
|
||||
"step 9: deleted vector {} should not appear in results",
|
||||
r.id,
|
||||
);
|
||||
}
|
||||
assert_eq!(
|
||||
post_delete_results.len(),
|
||||
vector_count - 10,
|
||||
"step 9: should have {} results after deleting 10",
|
||||
vector_count - 10,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 10: Compact the store
|
||||
// -----------------------------------------------------------------------
|
||||
let pre_compact_epoch = store.status().current_epoch;
|
||||
let compact_result = store.compact().expect("step 10: compact failed");
|
||||
|
||||
assert!(
|
||||
compact_result.segments_compacted > 0 || compact_result.bytes_reclaimed > 0,
|
||||
"step 10: compaction should reclaim space",
|
||||
);
|
||||
assert!(
|
||||
compact_result.epoch > pre_compact_epoch,
|
||||
"step 10: epoch should advance after compact",
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 11: Verify deleted vectors no longer appear in results
|
||||
// -----------------------------------------------------------------------
|
||||
let post_compact_results = store
|
||||
.query(&query_vec, vector_count, &QueryOptions::default())
|
||||
.expect("step 11: post-compact query failed");
|
||||
|
||||
for r in &post_compact_results {
|
||||
assert!(
|
||||
r.id > 10,
|
||||
"step 11: deleted vector {} appeared after compaction",
|
||||
r.id,
|
||||
);
|
||||
}
|
||||
assert_eq!(
|
||||
post_compact_results.len(),
|
||||
vector_count - 10,
|
||||
"step 11: should still have {} results post-compact",
|
||||
vector_count - 10,
|
||||
);
|
||||
|
||||
// Verify post-compact status.
|
||||
let post_compact_status = store.status();
|
||||
assert_eq!(
|
||||
post_compact_status.total_vectors,
|
||||
(vector_count - 10) as u64,
|
||||
"step 11: status should reflect {} live vectors",
|
||||
vector_count - 10,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 12: Derive a child store
|
||||
// -----------------------------------------------------------------------
|
||||
let child = store
|
||||
.derive(&child_path, DerivationType::Clone, Some(options.clone()))
|
||||
.expect("step 12: derive failed");
|
||||
|
||||
// Verify lineage.
|
||||
assert_eq!(
|
||||
child.lineage_depth(),
|
||||
1,
|
||||
"step 12: child lineage depth should be 1",
|
||||
);
|
||||
assert_eq!(
|
||||
child.parent_id(),
|
||||
store.file_id(),
|
||||
"step 12: child parent_id should match parent file_id",
|
||||
);
|
||||
assert_ne!(
|
||||
child.file_id(),
|
||||
store.file_id(),
|
||||
"step 12: child should have a distinct file_id",
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 13: Verify child can be queried independently
|
||||
// -----------------------------------------------------------------------
|
||||
// The child is a fresh derived store (no vectors copied by default via
|
||||
// derive -- only lineage metadata). Query should return empty or results
|
||||
// depending on whether vectors were inherited. We just verify it does not
|
||||
// panic and returns a valid response.
|
||||
let child_query = random_vector(dim as usize, 999);
|
||||
let child_results = child
|
||||
.query(&child_query, k, &QueryOptions::default())
|
||||
.expect("step 13: child query failed");
|
||||
|
||||
// Child is newly derived with no vectors of its own, so results should be empty.
|
||||
assert!(
|
||||
child_results.is_empty(),
|
||||
"step 13: freshly derived child should have no vectors, got {}",
|
||||
child_results.len(),
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 14: Verify segment listing works on both parent and child
|
||||
// -----------------------------------------------------------------------
|
||||
let parent_segments = store.segment_dir();
|
||||
assert!(
|
||||
!parent_segments.is_empty(),
|
||||
"step 14: parent should have at least one segment",
|
||||
);
|
||||
|
||||
let child_segments = child.segment_dir();
|
||||
assert!(
|
||||
!child_segments.is_empty(),
|
||||
"step 14: child should have at least one segment (manifest)",
|
||||
);
|
||||
|
||||
// Verify segment tuples have valid structure (seg_id > 0, type byte > 0).
|
||||
for &(seg_id, _offset, _len, seg_type) in parent_segments {
|
||||
assert!(seg_id > 0, "step 14: parent segment ID should be > 0");
|
||||
assert!(seg_type > 0, "step 14: parent segment type should be > 0");
|
||||
}
|
||||
for &(seg_id, _offset, _len, seg_type) in child_segments {
|
||||
assert!(seg_id > 0, "step 14: child segment ID should be > 0");
|
||||
assert!(seg_type > 0, "step 14: child segment type should be > 0");
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 15: Clean up temporary files
|
||||
// -----------------------------------------------------------------------
|
||||
child.close().expect("step 15: child close failed");
|
||||
store.close().expect("step 15: parent close failed");
|
||||
|
||||
// TempDir's Drop impl will remove the directory, but verify the files exist
|
||||
// before cleanup happens.
|
||||
assert!(
|
||||
store_path.exists(),
|
||||
"step 15: parent store file should exist before cleanup",
|
||||
);
|
||||
assert!(
|
||||
child_path.exists(),
|
||||
"step 15: child store file should exist before cleanup",
|
||||
);
|
||||
|
||||
// Explicitly drop the TempDir to trigger cleanup.
|
||||
drop(dir);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Additional focused smoke tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Verify that cosine metric returns distances strictly in [0.0, 2.0] range
|
||||
/// for all query results when using normalized vectors. This test runs within
|
||||
/// a single session (no restart) to avoid the metric-not-persisted issue.
|
||||
#[test]
|
||||
fn smoke_cosine_distance_range() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("cosine_range.rvf");
|
||||
|
||||
let dim: u16 = 128;
|
||||
let options = make_options(dim, DistanceMetric::Cosine);
|
||||
|
||||
let mut store = RvfStore::create(&path, options).unwrap();
|
||||
|
||||
// Ingest 50 normalized vectors.
|
||||
let vectors: Vec<Vec<f32>> = (0..50)
|
||||
.map(|i| random_unit_vector(dim as usize, i * 31 + 3))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=50).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
|
||||
// Query with several different vectors and verify distance range.
|
||||
for seed in [0, 42, 100, 999, 12345] {
|
||||
let q = random_unit_vector(dim as usize, seed);
|
||||
let results = store.query(&q, 50, &QueryOptions::default()).unwrap();
|
||||
|
||||
for r in &results {
|
||||
assert!(
|
||||
r.distance >= 0.0 && r.distance <= 2.0,
|
||||
"cosine distance {} out of range [0.0, 2.0] for seed {}",
|
||||
r.distance,
|
||||
seed,
|
||||
);
|
||||
}
|
||||
|
||||
// Verify sorting.
|
||||
for i in 1..results.len() {
|
||||
assert!(
|
||||
results[i].distance >= results[i - 1].distance,
|
||||
"results not sorted for seed {}: {} > {} at position {}",
|
||||
seed,
|
||||
results[i - 1].distance,
|
||||
results[i].distance,
|
||||
i,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
/// Verify persistence across multiple close/reopen cycles with interleaved
|
||||
/// ingests and deletes. Uses L2 metric for cross-restart consistency.
|
||||
#[test]
|
||||
fn smoke_multi_restart_persistence() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("multi_restart.rvf");
|
||||
let dim: u16 = 128;
|
||||
|
||||
let options = make_options(dim, DistanceMetric::L2);
|
||||
|
||||
// Cycle 1: create and ingest 50 vectors.
|
||||
{
|
||||
let mut store = RvfStore::create(&path, options.clone()).unwrap();
|
||||
let vectors: Vec<Vec<f32>> = (0..50)
|
||||
.map(|i| random_vector(dim as usize, i))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=50).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
assert_eq!(store.status().total_vectors, 50);
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Cycle 2: reopen, ingest 50 more, delete 10, close.
|
||||
{
|
||||
let mut store = RvfStore::open(&path).unwrap();
|
||||
assert_eq!(store.status().total_vectors, 50);
|
||||
|
||||
let vectors: Vec<Vec<f32>> = (50..100)
|
||||
.map(|i| random_vector(dim as usize, i))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (51..=100).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
assert_eq!(store.status().total_vectors, 100);
|
||||
|
||||
store.delete(&[5, 10, 15, 20, 25, 55, 60, 65, 70, 75]).unwrap();
|
||||
assert_eq!(store.status().total_vectors, 90);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Cycle 3: reopen, verify counts, compact, close.
|
||||
{
|
||||
let mut store = RvfStore::open(&path).unwrap();
|
||||
assert_eq!(
|
||||
store.status().total_vectors, 90,
|
||||
"cycle 3: 90 vectors should survive two restarts",
|
||||
);
|
||||
|
||||
store.compact().unwrap();
|
||||
assert_eq!(store.status().total_vectors, 90);
|
||||
|
||||
// Verify no deleted IDs appear in a full query.
|
||||
let q = random_vector(dim as usize, 42);
|
||||
let results = store.query(&q, 100, &QueryOptions::default()).unwrap();
|
||||
let deleted_ids = [5, 10, 15, 20, 25, 55, 60, 65, 70, 75];
|
||||
for r in &results {
|
||||
assert!(
|
||||
!deleted_ids.contains(&r.id),
|
||||
"cycle 3: deleted vector {} appeared after compact + restart",
|
||||
r.id,
|
||||
);
|
||||
}
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Cycle 4: final reopen (readonly), verify persistence survived compact.
|
||||
{
|
||||
let store = RvfStore::open_readonly(&path).unwrap();
|
||||
assert_eq!(
|
||||
store.status().total_vectors, 90,
|
||||
"cycle 4: 90 vectors should survive compact + restart",
|
||||
);
|
||||
assert!(store.status().read_only);
|
||||
}
|
||||
}
|
||||
|
||||
/// Verify metadata ingestion and that vector IDs are correct after batch
|
||||
/// operations.
|
||||
#[test]
|
||||
fn smoke_metadata_and_ids() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("meta_ids.rvf");
|
||||
let dim: u16 = 128;
|
||||
|
||||
let options = make_options(dim, DistanceMetric::L2);
|
||||
|
||||
let mut store = RvfStore::create(&path, options).unwrap();
|
||||
|
||||
// Ingest 100 vectors, each with a metadata entry.
|
||||
let vectors: Vec<Vec<f32>> = (0..100)
|
||||
.map(|i| random_vector(dim as usize, i * 7 + 1))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=100).collect();
|
||||
let metadata: Vec<MetadataEntry> = ids
|
||||
.iter()
|
||||
.map(|&id| MetadataEntry {
|
||||
field_id: 0,
|
||||
value: MetadataValue::U64(id),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let result = store.ingest_batch(&refs, &ids, Some(&metadata)).unwrap();
|
||||
assert_eq!(result.accepted, 100);
|
||||
assert_eq!(result.rejected, 0);
|
||||
|
||||
// Query for exact match of vector id=42.
|
||||
let query = random_vector(dim as usize, 41 * 7 + 1);
|
||||
let results = store.query(&query, 1, &QueryOptions::default()).unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].id, 42, "exact match should be id=42");
|
||||
assert!(results[0].distance < 1e-5);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
|
@ -50,6 +50,7 @@ console_error_panic_hook = "0.1"
|
|||
# ===== RVF Backend (optional) =====
|
||||
rvf-runtime = { path = "../rvf/rvf-runtime", features = ["std"], optional = true }
|
||||
rvf-types = { path = "../rvf/rvf-types", features = ["std"], optional = true }
|
||||
fs2 = { version = "0.4", optional = true }
|
||||
|
||||
# ===== Standard Dependencies =====
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
|
|
@ -69,7 +70,7 @@ getrandom = { version = "0.2", features = ["js"] }
|
|||
|
||||
[features]
|
||||
default = []
|
||||
rvf-backend = ["dep:rvf-runtime", "dep:rvf-types"]
|
||||
rvf-backend = ["dep:rvf-runtime", "dep:rvf-types", "dep:fs2"]
|
||||
# Feature flags to be added later
|
||||
# sql = ["dep:sqlparser"]
|
||||
# sparql = []
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@
|
|||
//!
|
||||
//! On startup: compare epochs and rebuild the lagging side.
|
||||
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
/// Monotonic epoch counter shared between RVF and metadata stores.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Epoch(pub u64);
|
||||
|
|
@ -26,7 +28,35 @@ impl Epoch {
|
|||
}
|
||||
}
|
||||
|
||||
/// State describing the relationship between RVF and metadata epochs.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum EpochState {
|
||||
/// Both stores agree on the current epoch.
|
||||
Synchronized,
|
||||
/// RVF store is ahead of metadata by the given delta.
|
||||
RvfAhead(u64),
|
||||
/// Metadata store is ahead of RVF by the given delta (anomalous).
|
||||
MetadataAhead(u64),
|
||||
}
|
||||
|
||||
/// Action to take after comparing epochs.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ReconcileAction {
|
||||
/// No reconciliation needed -- both stores are in sync.
|
||||
None,
|
||||
/// Metadata is stale; rebuild it from the authoritative RVF store.
|
||||
RebuildMetadata,
|
||||
/// RVF is somehow behind metadata; rebuild vectors from RVF file.
|
||||
/// This should not normally happen and indicates a prior incomplete write.
|
||||
RebuildFromRvf,
|
||||
/// Metadata is ahead which should never happen under correct operation.
|
||||
/// Log a warning and trust RVF as the source of truth.
|
||||
LogWarningTrustRvf,
|
||||
}
|
||||
|
||||
/// Result of comparing epochs between RVF and metadata stores.
|
||||
///
|
||||
/// Kept for backward compatibility with existing callers.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ReconciliationAction {
|
||||
/// Both stores are in sync -- no action needed.
|
||||
|
|
@ -37,7 +67,46 @@ pub enum ReconciliationAction {
|
|||
TrustRvf { rvf_epoch: Epoch, metadata_epoch: Epoch },
|
||||
}
|
||||
|
||||
/// Compare epochs and determine reconciliation action.
|
||||
/// Compare raw epoch values and return the relationship state.
|
||||
pub fn compare_epochs(rvf_epoch: u64, metadata_epoch: u64) -> EpochState {
|
||||
if rvf_epoch == metadata_epoch {
|
||||
EpochState::Synchronized
|
||||
} else if rvf_epoch > metadata_epoch {
|
||||
EpochState::RvfAhead(rvf_epoch - metadata_epoch)
|
||||
} else {
|
||||
EpochState::MetadataAhead(metadata_epoch - rvf_epoch)
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine the reconciliation action for a given epoch state.
|
||||
pub fn reconcile_action(state: &EpochState) -> ReconcileAction {
|
||||
match state {
|
||||
EpochState::Synchronized => ReconcileAction::None,
|
||||
EpochState::RvfAhead(delta) => {
|
||||
if *delta == 1 {
|
||||
// Common case: a single write committed to RVF but metadata
|
||||
// update was lost (e.g. crash between step 1 and step 2).
|
||||
ReconcileAction::RebuildMetadata
|
||||
} else {
|
||||
// Multiple epochs behind -- still rebuild metadata, but the
|
||||
// gap is larger so more data must be replayed.
|
||||
ReconcileAction::RebuildMetadata
|
||||
}
|
||||
}
|
||||
EpochState::MetadataAhead(delta) => {
|
||||
if *delta == 1 {
|
||||
// Metadata committed but RVF write was lost. This means the
|
||||
// RVF file is still valid at its own epoch -- rebuild from it.
|
||||
ReconcileAction::RebuildFromRvf
|
||||
} else {
|
||||
// Large gap with metadata ahead is anomalous. Trust RVF.
|
||||
ReconcileAction::LogWarningTrustRvf
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare epochs and determine reconciliation action (legacy API).
|
||||
pub fn reconcile(rvf_epoch: Epoch, metadata_epoch: Epoch) -> ReconciliationAction {
|
||||
match rvf_epoch.cmp(&metadata_epoch) {
|
||||
std::cmp::Ordering::Equal => ReconciliationAction::InSync,
|
||||
|
|
@ -52,10 +121,111 @@ pub fn reconcile(rvf_epoch: Epoch, metadata_epoch: Epoch) -> ReconciliationActio
|
|||
}
|
||||
}
|
||||
|
||||
/// Thread-safe monotonic epoch tracker.
|
||||
///
|
||||
/// Uses `AtomicU64` internally so it can be shared across threads without
|
||||
/// a mutex. The counter is strictly monotonic: it can only move forward.
|
||||
///
|
||||
/// # Write protocol
|
||||
///
|
||||
/// Callers must follow the three-phase commit:
|
||||
/// 1. Call `begin_write()` to get the next epoch value.
|
||||
/// 2. Write vectors to RVF with that epoch.
|
||||
/// 3. Write metadata to IndexedDB with that epoch.
|
||||
/// 4. Call `commit(epoch)` to advance the tracker.
|
||||
///
|
||||
/// If step 2 or 3 fails, do NOT call `commit` -- the tracker stays at the
|
||||
/// previous epoch so that the next startup triggers reconciliation.
|
||||
pub struct EpochTracker {
|
||||
/// Current committed epoch.
|
||||
current: AtomicU64,
|
||||
}
|
||||
|
||||
impl EpochTracker {
|
||||
/// Create a new tracker starting at the given epoch.
|
||||
pub fn new(initial: u64) -> Self {
|
||||
Self {
|
||||
current: AtomicU64::new(initial),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a tracker starting at epoch zero.
|
||||
pub fn zero() -> Self {
|
||||
Self::new(0)
|
||||
}
|
||||
|
||||
/// Read the current committed epoch.
|
||||
pub fn current(&self) -> u64 {
|
||||
self.current.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
/// Return the next epoch value for a pending write.
|
||||
///
|
||||
/// This does NOT advance the tracker. The caller must call `commit`
|
||||
/// after both RVF and metadata writes succeed.
|
||||
pub fn begin_write(&self) -> u64 {
|
||||
self.current.load(Ordering::Acquire).checked_add(1).expect("epoch overflow")
|
||||
}
|
||||
|
||||
/// Commit the given epoch, advancing the tracker.
|
||||
///
|
||||
/// Returns `true` if the commit succeeded (epoch was exactly current + 1).
|
||||
/// Returns `false` if the epoch was stale or out of order, which means
|
||||
/// another writer committed first or the caller passed a wrong value.
|
||||
pub fn commit(&self, epoch: u64) -> bool {
|
||||
let expected = epoch.checked_sub(1).unwrap_or(0);
|
||||
self.current
|
||||
.compare_exchange(expected, epoch, Ordering::AcqRel, Ordering::Acquire)
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
/// Force-set the epoch to a specific value.
|
||||
///
|
||||
/// Used during recovery/reconciliation when we need to align the
|
||||
/// tracker with a known-good state read from disk.
|
||||
pub fn force_set(&self, epoch: u64) {
|
||||
self.current.store(epoch, Ordering::Release);
|
||||
}
|
||||
|
||||
/// Check the relationship between the RVF epoch stored on disk and the
|
||||
/// metadata epoch, then return the appropriate reconciliation action.
|
||||
pub fn check_and_reconcile(&self, rvf_epoch: u64, metadata_epoch: u64) -> ReconcileAction {
|
||||
let state = compare_epochs(rvf_epoch, metadata_epoch);
|
||||
let action = reconcile_action(&state);
|
||||
|
||||
// After reconciliation, align the tracker to the authoritative epoch.
|
||||
match &action {
|
||||
ReconcileAction::None => {
|
||||
self.force_set(rvf_epoch);
|
||||
}
|
||||
ReconcileAction::RebuildMetadata | ReconcileAction::RebuildFromRvf => {
|
||||
// After rebuild, both sides will match the RVF epoch.
|
||||
self.force_set(rvf_epoch);
|
||||
}
|
||||
ReconcileAction::LogWarningTrustRvf => {
|
||||
// Trust RVF -- set tracker to RVF epoch.
|
||||
self.force_set(rvf_epoch);
|
||||
}
|
||||
}
|
||||
|
||||
action
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for EpochTracker {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("EpochTracker")
|
||||
.field("current", &self.current.load(Ordering::Relaxed))
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// ---- Legacy API tests (preserved) ----
|
||||
|
||||
#[test]
|
||||
fn in_sync() {
|
||||
let e = Epoch(5);
|
||||
|
|
@ -91,4 +261,160 @@ mod tests {
|
|||
assert_eq!(Epoch::ZERO.next(), Epoch(1));
|
||||
assert_eq!(Epoch(99).next(), Epoch(100));
|
||||
}
|
||||
|
||||
// ---- New epoch state / reconcile tests ----
|
||||
|
||||
#[test]
|
||||
fn compare_epochs_synchronized() {
|
||||
assert_eq!(compare_epochs(5, 5), EpochState::Synchronized);
|
||||
assert_eq!(compare_epochs(0, 0), EpochState::Synchronized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compare_epochs_rvf_ahead() {
|
||||
assert_eq!(compare_epochs(10, 7), EpochState::RvfAhead(3));
|
||||
assert_eq!(compare_epochs(1, 0), EpochState::RvfAhead(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compare_epochs_metadata_ahead() {
|
||||
assert_eq!(compare_epochs(3, 8), EpochState::MetadataAhead(5));
|
||||
assert_eq!(compare_epochs(0, 1), EpochState::MetadataAhead(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconcile_action_none_when_synchronized() {
|
||||
let state = EpochState::Synchronized;
|
||||
assert_eq!(reconcile_action(&state), ReconcileAction::None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconcile_action_rebuild_metadata_when_rvf_ahead() {
|
||||
assert_eq!(
|
||||
reconcile_action(&EpochState::RvfAhead(1)),
|
||||
ReconcileAction::RebuildMetadata
|
||||
);
|
||||
assert_eq!(
|
||||
reconcile_action(&EpochState::RvfAhead(5)),
|
||||
ReconcileAction::RebuildMetadata
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconcile_action_rebuild_from_rvf_when_metadata_ahead_by_one() {
|
||||
assert_eq!(
|
||||
reconcile_action(&EpochState::MetadataAhead(1)),
|
||||
ReconcileAction::RebuildFromRvf
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconcile_action_log_warning_when_metadata_far_ahead() {
|
||||
assert_eq!(
|
||||
reconcile_action(&EpochState::MetadataAhead(3)),
|
||||
ReconcileAction::LogWarningTrustRvf
|
||||
);
|
||||
}
|
||||
|
||||
// ---- EpochTracker tests ----
|
||||
|
||||
#[test]
|
||||
fn tracker_zero_starts_at_zero() {
|
||||
let tracker = EpochTracker::zero();
|
||||
assert_eq!(tracker.current(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_new_starts_at_initial() {
|
||||
let tracker = EpochTracker::new(42);
|
||||
assert_eq!(tracker.current(), 42);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_begin_write_returns_next() {
|
||||
let tracker = EpochTracker::new(10);
|
||||
assert_eq!(tracker.begin_write(), 11);
|
||||
// begin_write is idempotent until commit
|
||||
assert_eq!(tracker.begin_write(), 11);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_commit_advances_epoch() {
|
||||
let tracker = EpochTracker::zero();
|
||||
let next = tracker.begin_write();
|
||||
assert_eq!(next, 1);
|
||||
assert!(tracker.commit(next));
|
||||
assert_eq!(tracker.current(), 1);
|
||||
|
||||
let next2 = tracker.begin_write();
|
||||
assert_eq!(next2, 2);
|
||||
assert!(tracker.commit(next2));
|
||||
assert_eq!(tracker.current(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_commit_rejects_stale_epoch() {
|
||||
let tracker = EpochTracker::new(5);
|
||||
// Try to commit epoch 3 which is behind current
|
||||
assert!(!tracker.commit(3));
|
||||
assert_eq!(tracker.current(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_commit_rejects_skip() {
|
||||
let tracker = EpochTracker::new(5);
|
||||
// Try to commit epoch 8, skipping 6 and 7
|
||||
assert!(!tracker.commit(8));
|
||||
assert_eq!(tracker.current(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_force_set() {
|
||||
let tracker = EpochTracker::new(10);
|
||||
tracker.force_set(100);
|
||||
assert_eq!(tracker.current(), 100);
|
||||
// Can also go backward with force_set (recovery scenario)
|
||||
tracker.force_set(5);
|
||||
assert_eq!(tracker.current(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_check_and_reconcile_in_sync() {
|
||||
let tracker = EpochTracker::zero();
|
||||
let action = tracker.check_and_reconcile(7, 7);
|
||||
assert_eq!(action, ReconcileAction::None);
|
||||
assert_eq!(tracker.current(), 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_check_and_reconcile_rvf_ahead() {
|
||||
let tracker = EpochTracker::zero();
|
||||
let action = tracker.check_and_reconcile(10, 8);
|
||||
assert_eq!(action, ReconcileAction::RebuildMetadata);
|
||||
assert_eq!(tracker.current(), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_check_and_reconcile_metadata_far_ahead() {
|
||||
let tracker = EpochTracker::zero();
|
||||
let action = tracker.check_and_reconcile(3, 8);
|
||||
assert_eq!(action, ReconcileAction::LogWarningTrustRvf);
|
||||
assert_eq!(tracker.current(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_debug_format() {
|
||||
let tracker = EpochTracker::new(42);
|
||||
let debug = format!("{:?}", tracker);
|
||||
assert!(debug.contains("EpochTracker"));
|
||||
assert!(debug.contains("42"));
|
||||
}
|
||||
|
||||
// ---- Thread safety (basic) ----
|
||||
|
||||
#[test]
|
||||
fn tracker_is_send_and_sync() {
|
||||
fn assert_send_sync<T: Send + Sync>() {}
|
||||
assert_send_sync::<EpochTracker>();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
296
crates/rvlite/src/storage/id_map.rs
Normal file
296
crates/rvlite/src/storage/id_map.rs
Normal file
|
|
@ -0,0 +1,296 @@
|
|||
//! Direct mapping between RVF vector IDs and SQL primary keys.
|
||||
//!
|
||||
//! In rvlite the mapping is identity: RVF u64 IDs are the same as SQL
|
||||
//! primary keys. This zero-cost design avoids an extra lookup table and
|
||||
//! keeps memory usage minimal.
|
||||
//!
|
||||
//! The [`IdMapping`] trait exists for future extensibility -- if a
|
||||
//! non-identity mapping is ever needed (e.g. hashed IDs, composite keys),
|
||||
//! a new implementation can be swapped in without changing call sites.
|
||||
|
||||
/// Trait for converting between RVF vector IDs and SQL primary keys.
|
||||
///
|
||||
/// Implementors define how the two ID spaces relate to each other.
|
||||
/// The default implementation ([`DirectIdMap`]) uses identity mapping.
|
||||
pub trait IdMapping {
|
||||
/// Convert a SQL primary key to an RVF vector ID.
|
||||
fn to_rvf_id(&self, sql_pk: u64) -> u64;
|
||||
|
||||
/// Convert an RVF vector ID back to a SQL primary key.
|
||||
fn to_sql_pk(&self, rvf_id: u64) -> u64;
|
||||
|
||||
/// Validate that every RVF ID in the slice has a corresponding SQL PK
|
||||
/// in the other slice, and vice versa. Both slices must contain the
|
||||
/// same set of values (possibly in different order) for the mapping
|
||||
/// to be considered valid.
|
||||
fn validate_mapping(&self, rvf_ids: &[u64], sql_pks: &[u64]) -> bool;
|
||||
}
|
||||
|
||||
/// Zero-cost identity mapping where RVF u64 IDs equal SQL primary keys.
|
||||
///
|
||||
/// This is the default and recommended mapping for rvlite. Because
|
||||
/// both ID spaces use `u64`, no conversion is needed and the mapping
|
||||
/// functions compile down to no-ops.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use rvlite::storage::id_map::{DirectIdMap, IdMapping};
|
||||
/// let map = DirectIdMap;
|
||||
/// assert_eq!(map.to_rvf_id(42), 42);
|
||||
/// assert_eq!(map.to_sql_pk(42), 42);
|
||||
/// ```
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub struct DirectIdMap;
|
||||
|
||||
impl DirectIdMap {
|
||||
/// Create a new direct (identity) ID map.
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
/// Convert a SQL primary key to an RVF vector ID (identity).
|
||||
///
|
||||
/// This is a free function alternative to the trait method, useful when
|
||||
/// you know the concrete type and want to avoid dynamic dispatch.
|
||||
#[inline(always)]
|
||||
pub fn to_rvf_id(sql_pk: u64) -> u64 {
|
||||
sql_pk
|
||||
}
|
||||
|
||||
/// Convert an RVF vector ID to a SQL primary key (identity).
|
||||
#[inline(always)]
|
||||
pub fn to_sql_pk(rvf_id: u64) -> u64 {
|
||||
rvf_id
|
||||
}
|
||||
|
||||
/// Validate that the two slices contain the same set of IDs.
|
||||
///
|
||||
/// Under identity mapping, `rvf_ids` and `sql_pks` must be equal
|
||||
/// as sets (same elements, possibly different order).
|
||||
pub fn validate_mapping(rvf_ids: &[u64], sql_pks: &[u64]) -> bool {
|
||||
if rvf_ids.len() != sql_pks.len() {
|
||||
return false;
|
||||
}
|
||||
let mut rvf_sorted: Vec<u64> = rvf_ids.to_vec();
|
||||
let mut sql_sorted: Vec<u64> = sql_pks.to_vec();
|
||||
rvf_sorted.sort_unstable();
|
||||
sql_sorted.sort_unstable();
|
||||
rvf_sorted == sql_sorted
|
||||
}
|
||||
}
|
||||
|
||||
impl IdMapping for DirectIdMap {
|
||||
#[inline(always)]
|
||||
fn to_rvf_id(&self, sql_pk: u64) -> u64 {
|
||||
sql_pk
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn to_sql_pk(&self, rvf_id: u64) -> u64 {
|
||||
rvf_id
|
||||
}
|
||||
|
||||
fn validate_mapping(&self, rvf_ids: &[u64], sql_pks: &[u64]) -> bool {
|
||||
DirectIdMap::validate_mapping(rvf_ids, sql_pks)
|
||||
}
|
||||
}
|
||||
|
||||
/// An offset-based ID mapping where SQL PKs start from a different base.
|
||||
///
|
||||
/// Useful when the SQL table uses auto-increment starting at 1 but
|
||||
/// the RVF store is zero-indexed (or vice versa).
|
||||
///
|
||||
/// `rvf_id = sql_pk + offset`
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct OffsetIdMap {
|
||||
/// Offset added to SQL PK to produce the RVF ID.
|
||||
/// Can be negative via wrapping arithmetic on u64.
|
||||
offset: i64,
|
||||
}
|
||||
|
||||
impl OffsetIdMap {
|
||||
/// Create an offset mapping.
|
||||
///
|
||||
/// `offset` is added to SQL PKs to produce RVF IDs.
|
||||
/// Use a negative offset if RVF IDs are smaller than SQL PKs.
|
||||
pub fn new(offset: i64) -> Self {
|
||||
Self { offset }
|
||||
}
|
||||
}
|
||||
|
||||
impl IdMapping for OffsetIdMap {
|
||||
#[inline]
|
||||
fn to_rvf_id(&self, sql_pk: u64) -> u64 {
|
||||
(sql_pk as i64).wrapping_add(self.offset) as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_sql_pk(&self, rvf_id: u64) -> u64 {
|
||||
(rvf_id as i64).wrapping_sub(self.offset) as u64
|
||||
}
|
||||
|
||||
fn validate_mapping(&self, rvf_ids: &[u64], sql_pks: &[u64]) -> bool {
|
||||
if rvf_ids.len() != sql_pks.len() {
|
||||
return false;
|
||||
}
|
||||
let mut expected: Vec<u64> = sql_pks.iter().map(|&pk| self.to_rvf_id(pk)).collect();
|
||||
let mut actual: Vec<u64> = rvf_ids.to_vec();
|
||||
expected.sort_unstable();
|
||||
actual.sort_unstable();
|
||||
expected == actual
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// ---- DirectIdMap tests ----
|
||||
|
||||
#[test]
|
||||
fn direct_to_rvf_id_is_identity() {
|
||||
assert_eq!(DirectIdMap::to_rvf_id(0), 0);
|
||||
assert_eq!(DirectIdMap::to_rvf_id(42), 42);
|
||||
assert_eq!(DirectIdMap::to_rvf_id(u64::MAX), u64::MAX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_to_sql_pk_is_identity() {
|
||||
assert_eq!(DirectIdMap::to_sql_pk(0), 0);
|
||||
assert_eq!(DirectIdMap::to_sql_pk(42), 42);
|
||||
assert_eq!(DirectIdMap::to_sql_pk(u64::MAX), u64::MAX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_roundtrip() {
|
||||
for id in [0, 1, 100, u64::MAX / 2, u64::MAX] {
|
||||
assert_eq!(DirectIdMap::to_sql_pk(DirectIdMap::to_rvf_id(id)), id);
|
||||
assert_eq!(DirectIdMap::to_rvf_id(DirectIdMap::to_sql_pk(id)), id);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_same_elements() {
|
||||
let rvf = vec![1, 2, 3];
|
||||
let sql = vec![3, 1, 2];
|
||||
assert!(DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_empty() {
|
||||
assert!(DirectIdMap::validate_mapping(&[], &[]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_different_length_fails() {
|
||||
let rvf = vec![1, 2, 3];
|
||||
let sql = vec![1, 2];
|
||||
assert!(!DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_different_elements_fails() {
|
||||
let rvf = vec![1, 2, 3];
|
||||
let sql = vec![1, 2, 4];
|
||||
assert!(!DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_duplicates_match() {
|
||||
let rvf = vec![1, 1, 2];
|
||||
let sql = vec![1, 2, 1];
|
||||
assert!(DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_duplicates_mismatch() {
|
||||
let rvf = vec![1, 1, 2];
|
||||
let sql = vec![1, 2, 2];
|
||||
assert!(!DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
// ---- IdMapping trait via DirectIdMap ----
|
||||
|
||||
#[test]
|
||||
fn trait_direct_to_rvf_id() {
|
||||
let map = DirectIdMap;
|
||||
assert_eq!(IdMapping::to_rvf_id(&map, 99), 99);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trait_direct_to_sql_pk() {
|
||||
let map = DirectIdMap;
|
||||
assert_eq!(IdMapping::to_sql_pk(&map, 99), 99);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trait_direct_validate() {
|
||||
let map = DirectIdMap;
|
||||
assert!(IdMapping::validate_mapping(&map, &[1, 2], &[2, 1]));
|
||||
assert!(!IdMapping::validate_mapping(&map, &[1, 2], &[2, 3]));
|
||||
}
|
||||
|
||||
// ---- OffsetIdMap tests ----
|
||||
|
||||
#[test]
|
||||
fn offset_positive() {
|
||||
let map = OffsetIdMap::new(10);
|
||||
assert_eq!(map.to_rvf_id(0), 10);
|
||||
assert_eq!(map.to_rvf_id(5), 15);
|
||||
assert_eq!(map.to_sql_pk(10), 0);
|
||||
assert_eq!(map.to_sql_pk(15), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_negative() {
|
||||
let map = OffsetIdMap::new(-1);
|
||||
// SQL PK 1 -> RVF ID 0
|
||||
assert_eq!(map.to_rvf_id(1), 0);
|
||||
assert_eq!(map.to_sql_pk(0), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_zero_is_identity() {
|
||||
let map = OffsetIdMap::new(0);
|
||||
for id in [0, 1, 42, 1000] {
|
||||
assert_eq!(map.to_rvf_id(id), id);
|
||||
assert_eq!(map.to_sql_pk(id), id);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_roundtrip() {
|
||||
let map = OffsetIdMap::new(7);
|
||||
for pk in [0, 1, 100, 999] {
|
||||
assert_eq!(map.to_sql_pk(map.to_rvf_id(pk)), pk);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_validate() {
|
||||
let map = OffsetIdMap::new(10);
|
||||
// SQL PKs [0, 1, 2] -> RVF IDs [10, 11, 12]
|
||||
assert!(map.validate_mapping(&[12, 10, 11], &[2, 0, 1]));
|
||||
assert!(!map.validate_mapping(&[10, 11, 12], &[0, 1, 3]));
|
||||
}
|
||||
|
||||
// ---- Dynamic dispatch ----
|
||||
|
||||
#[test]
|
||||
fn trait_object_works() {
|
||||
let direct: Box<dyn IdMapping> = Box::new(DirectIdMap);
|
||||
assert_eq!(direct.to_rvf_id(5), 5);
|
||||
|
||||
let offset: Box<dyn IdMapping> = Box::new(OffsetIdMap::new(100));
|
||||
assert_eq!(offset.to_rvf_id(5), 105);
|
||||
}
|
||||
|
||||
// ---- Default impl ----
|
||||
|
||||
#[test]
|
||||
fn direct_default() {
|
||||
let map: DirectIdMap = Default::default();
|
||||
assert_eq!(map.to_rvf_id(7), 7);
|
||||
}
|
||||
}
|
||||
|
|
@ -11,5 +11,11 @@ pub mod state;
|
|||
#[cfg(feature = "rvf-backend")]
|
||||
pub mod epoch;
|
||||
|
||||
#[cfg(feature = "rvf-backend")]
|
||||
pub mod writer_lease;
|
||||
|
||||
#[cfg(feature = "rvf-backend")]
|
||||
pub mod id_map;
|
||||
|
||||
pub use indexeddb::IndexedDBStorage;
|
||||
pub use state::{GraphState, RvLiteState, TripleStoreState, VectorState};
|
||||
|
|
|
|||
543
crates/rvlite/src/storage/writer_lease.rs
Normal file
543
crates/rvlite/src/storage/writer_lease.rs
Normal file
|
|
@ -0,0 +1,543 @@
|
|||
//! File-based writer lease for single-writer concurrency in rvlite.
|
||||
//!
|
||||
//! Provides a cooperative lock mechanism using a lock file with PID and
|
||||
//! timestamp. Only one writer may hold the lease at a time. The lease
|
||||
//! includes a heartbeat timestamp that is checked for staleness so that
|
||||
//! crashed processes do not permanently block new writers.
|
||||
//!
|
||||
//! Lock file location: `{store_path}.lock`
|
||||
//! Lock file contents: JSON with `pid`, `timestamp_secs`, `hostname`.
|
||||
|
||||
use std::fs;
|
||||
use std::io::{self, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Default staleness threshold -- if the heartbeat is older than this
|
||||
/// duration, the lease is considered abandoned and may be force-acquired.
|
||||
const DEFAULT_STALE_THRESHOLD: Duration = Duration::from_secs(30);
|
||||
|
||||
/// Contents written to the lock file.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct LeaseMeta {
|
||||
/// Process ID of the lock holder.
|
||||
pid: u32,
|
||||
/// Unix timestamp in seconds when the lease was last refreshed.
|
||||
timestamp_secs: u64,
|
||||
/// Hostname of the lock holder.
|
||||
hostname: String,
|
||||
}
|
||||
|
||||
/// A writer lease backed by a lock file on disk.
|
||||
///
|
||||
/// While this struct is alive, the lease is held. Dropping it releases
|
||||
/// the lock file automatically via the `Drop` implementation.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```no_run
|
||||
/// use std::path::Path;
|
||||
/// use std::time::Duration;
|
||||
/// # // This is a doc-test stub; actual usage requires the rvf-backend feature.
|
||||
/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// // let lease = WriterLease::acquire(Path::new("/data/store.rvf"), Duration::from_secs(5))?;
|
||||
/// // ... perform writes ...
|
||||
/// // lease.release()?; // or just let it drop
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub struct WriterLease {
|
||||
/// Path to the lock file.
|
||||
lock_path: PathBuf,
|
||||
/// Our PID, used to verify ownership on release.
|
||||
pid: u32,
|
||||
/// Whether the lease has been explicitly released.
|
||||
released: bool,
|
||||
}
|
||||
|
||||
impl WriterLease {
|
||||
/// Attempt to acquire the writer lease for the given store path.
|
||||
///
|
||||
/// The lock file is created at `{path}.lock`. If another process holds
|
||||
/// the lease, this function will retry until `timeout` elapses. If the
|
||||
/// existing lease is stale (heartbeat older than 30 seconds and the
|
||||
/// holder PID is not alive), the stale lock is broken and acquisition
|
||||
/// proceeds.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns `io::Error` with `WouldBlock` if the timeout expires without
|
||||
/// acquiring the lease, or propagates any underlying I/O errors.
|
||||
pub fn acquire(path: &Path, timeout: Duration) -> io::Result<Self> {
|
||||
let lock_path = lock_path_for(path);
|
||||
let pid = std::process::id();
|
||||
let deadline = Instant::now() + timeout;
|
||||
|
||||
loop {
|
||||
// Try to create the lock file exclusively.
|
||||
match try_create_lock(&lock_path, pid) {
|
||||
Ok(()) => {
|
||||
return Ok(WriterLease {
|
||||
lock_path,
|
||||
pid,
|
||||
released: false,
|
||||
});
|
||||
}
|
||||
Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {
|
||||
// Lock file exists -- check if it is stale.
|
||||
if Self::is_stale(&lock_path, DEFAULT_STALE_THRESHOLD) {
|
||||
// Force-remove the stale lock and retry.
|
||||
let _ = fs::remove_file(&lock_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Lock is active. Check timeout.
|
||||
if Instant::now() >= deadline {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::WouldBlock,
|
||||
format!(
|
||||
"writer lease acquisition timed out after {:?} for {:?}",
|
||||
timeout, lock_path
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
// Brief sleep before retrying.
|
||||
std::thread::sleep(Duration::from_millis(50));
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Explicitly release the writer lease.
|
||||
///
|
||||
/// Verifies that the lock file still belongs to this process before
|
||||
/// removing it to avoid deleting a lock acquired by another process
|
||||
/// after a stale break.
|
||||
pub fn release(&mut self) -> io::Result<()> {
|
||||
if self.released {
|
||||
return Ok(());
|
||||
}
|
||||
self.do_release();
|
||||
self.released = true;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Refresh the heartbeat timestamp in the lock file.
|
||||
///
|
||||
/// Writers performing long operations should call this periodically
|
||||
/// (e.g. every 10 seconds) to prevent the lease from appearing stale.
|
||||
pub fn refresh_heartbeat(&self) -> io::Result<()> {
|
||||
if self.released {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"cannot refresh a released lease",
|
||||
));
|
||||
}
|
||||
// Verify we still own the lock.
|
||||
if !self.owns_lock() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"lease was taken over by another process",
|
||||
));
|
||||
}
|
||||
write_lock_file(&self.lock_path, self.pid)
|
||||
}
|
||||
|
||||
/// Check whether the lock file at the given path is stale.
|
||||
///
|
||||
/// A lock is stale if:
|
||||
/// - The lock file does not exist (vacuously stale).
|
||||
/// - The lock file cannot be parsed.
|
||||
/// - The heartbeat timestamp is older than `threshold`.
|
||||
/// - The PID in the lock file is not alive on the current host.
|
||||
pub fn is_stale(path: &Path, threshold: Duration) -> bool {
|
||||
let lock_path = if path.extension().map_or(false, |e| e == "lock") {
|
||||
path.to_path_buf()
|
||||
} else {
|
||||
lock_path_for(path)
|
||||
};
|
||||
|
||||
let content = match fs::read_to_string(&lock_path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return true, // Missing or unreadable = stale.
|
||||
};
|
||||
|
||||
let meta: LeaseMeta = match serde_json::from_str(&content) {
|
||||
Ok(m) => m,
|
||||
Err(_) => return true, // Corrupt = stale.
|
||||
};
|
||||
|
||||
// Check age.
|
||||
let now_secs = current_unix_secs();
|
||||
let age_secs = now_secs.saturating_sub(meta.timestamp_secs);
|
||||
if age_secs > threshold.as_secs() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if PID is alive (only meaningful on same host).
|
||||
let our_hostname = get_hostname();
|
||||
if meta.hostname == our_hostname && !is_pid_alive(meta.pid) {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Return the path to the lock file.
|
||||
pub fn lock_path(&self) -> &Path {
|
||||
&self.lock_path
|
||||
}
|
||||
|
||||
/// Check whether this lease still owns the lock file.
|
||||
fn owns_lock(&self) -> bool {
|
||||
let content = match fs::read_to_string(&self.lock_path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return false,
|
||||
};
|
||||
let meta: LeaseMeta = match serde_json::from_str(&content) {
|
||||
Ok(m) => m,
|
||||
Err(_) => return false,
|
||||
};
|
||||
meta.pid == self.pid
|
||||
}
|
||||
|
||||
/// Internal release logic.
|
||||
fn do_release(&self) {
|
||||
if self.owns_lock() {
|
||||
let _ = fs::remove_file(&self.lock_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for WriterLease {
|
||||
fn drop(&mut self) {
|
||||
if !self.released {
|
||||
self.do_release();
|
||||
self.released = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for WriterLease {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("WriterLease")
|
||||
.field("lock_path", &self.lock_path)
|
||||
.field("pid", &self.pid)
|
||||
.field("released", &self.released)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Helper functions ----
|
||||
|
||||
/// Compute the lock file path for a store path.
|
||||
fn lock_path_for(store_path: &Path) -> PathBuf {
|
||||
let mut p = store_path.as_os_str().to_os_string();
|
||||
p.push(".lock");
|
||||
PathBuf::from(p)
|
||||
}
|
||||
|
||||
/// Try to atomically create the lock file. Fails with `AlreadyExists` if
|
||||
/// another process holds the lock.
|
||||
fn try_create_lock(lock_path: &Path, pid: u32) -> io::Result<()> {
|
||||
// Ensure parent directory exists.
|
||||
if let Some(parent) = lock_path.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
|
||||
// Use create_new for O_CREAT | O_EXCL semantics.
|
||||
let meta = LeaseMeta {
|
||||
pid,
|
||||
timestamp_secs: current_unix_secs(),
|
||||
hostname: get_hostname(),
|
||||
};
|
||||
let content = serde_json::to_string(&meta).map_err(|e| {
|
||||
io::Error::new(io::ErrorKind::Other, format!("serialize lease meta: {e}"))
|
||||
})?;
|
||||
|
||||
let mut file = fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.open(lock_path)?;
|
||||
file.write_all(content.as_bytes())?;
|
||||
file.sync_all()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Overwrite an existing lock file with a fresh timestamp.
|
||||
fn write_lock_file(lock_path: &Path, pid: u32) -> io::Result<()> {
|
||||
let meta = LeaseMeta {
|
||||
pid,
|
||||
timestamp_secs: current_unix_secs(),
|
||||
hostname: get_hostname(),
|
||||
};
|
||||
let content = serde_json::to_string(&meta).map_err(|e| {
|
||||
io::Error::new(io::ErrorKind::Other, format!("serialize lease meta: {e}"))
|
||||
})?;
|
||||
fs::write(lock_path, content.as_bytes())
|
||||
}
|
||||
|
||||
/// Get the current Unix timestamp in seconds.
|
||||
fn current_unix_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_secs())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Best-effort hostname retrieval.
|
||||
fn get_hostname() -> String {
|
||||
std::env::var("HOSTNAME").unwrap_or_else(|_| {
|
||||
fs::read_to_string("/etc/hostname")
|
||||
.unwrap_or_else(|_| "unknown".into())
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
}
|
||||
|
||||
/// Check whether a process with the given PID is alive.
|
||||
fn is_pid_alive(pid: u32) -> bool {
|
||||
#[cfg(unix)]
|
||||
{
|
||||
// kill(pid, 0) checks existence without sending a signal.
|
||||
let ret = unsafe { libc_kill(pid as i32, 0) };
|
||||
if ret == 0 {
|
||||
return true;
|
||||
}
|
||||
// EPERM means the process exists but belongs to another user.
|
||||
let errno = unsafe { *errno_location() };
|
||||
errno == 1 // EPERM
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = pid;
|
||||
true // Conservatively assume alive on non-Unix.
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
extern "C" {
|
||||
fn kill(pid: i32, sig: i32) -> i32;
|
||||
fn __errno_location() -> *mut i32;
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
unsafe fn libc_kill(pid: i32, sig: i32) -> i32 {
|
||||
unsafe { kill(pid, sig) }
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
unsafe fn errno_location() -> *mut i32 {
|
||||
unsafe { __errno_location() }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use std::sync::atomic::{AtomicU64, Ordering as AtomicOrdering};
|
||||
|
||||
/// Counter to generate unique directory names for each test, avoiding
|
||||
/// cross-test interference when running in parallel.
|
||||
static TEST_COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||
|
||||
fn unique_dir(name: &str) -> PathBuf {
|
||||
let id = TEST_COUNTER.fetch_add(1, AtomicOrdering::Relaxed);
|
||||
let dir = std::env::temp_dir().join(format!(
|
||||
"rvlite_lease_{}_{}_{}",
|
||||
std::process::id(),
|
||||
id,
|
||||
name
|
||||
));
|
||||
let _ = fs::create_dir_all(&dir);
|
||||
dir
|
||||
}
|
||||
|
||||
fn cleanup(dir: &Path) {
|
||||
let _ = fs::remove_dir_all(dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lock_path_computation() {
|
||||
let p = Path::new("/tmp/store.rvf");
|
||||
assert_eq!(lock_path_for(p), PathBuf::from("/tmp/store.rvf.lock"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn acquire_and_release() {
|
||||
let dir = unique_dir("acquire_release");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let mut lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
assert!(lease.lock_path().exists());
|
||||
|
||||
lease.release().unwrap();
|
||||
assert!(!lease.lock_path().exists());
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn double_acquire_fails_within_timeout() {
|
||||
let dir = unique_dir("double_acquire");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let _lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
|
||||
// Second acquire should time out quickly. The lock is held by our own
|
||||
// PID and is fresh, so it cannot be broken as stale.
|
||||
let result = WriterLease::acquire(&store_path, Duration::from_millis(150));
|
||||
assert!(result.is_err());
|
||||
assert_eq!(result.unwrap_err().kind(), io::ErrorKind::WouldBlock);
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drop_releases_lease() {
|
||||
let dir = unique_dir("drop_release");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let lock_file = lock_path_for(&store_path);
|
||||
|
||||
{
|
||||
let _lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
assert!(lock_file.exists());
|
||||
}
|
||||
// After drop, lock file should be gone.
|
||||
assert!(!lock_file.exists());
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stale_lease_is_detected() {
|
||||
let dir = unique_dir("stale_detect");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
let lock_path = lock_path_for(&store_path);
|
||||
|
||||
// Write a lock file with a very old timestamp and dead PID.
|
||||
let meta = LeaseMeta {
|
||||
pid: 999_999_999, // Almost certainly not alive.
|
||||
timestamp_secs: current_unix_secs().saturating_sub(120),
|
||||
hostname: get_hostname(),
|
||||
};
|
||||
let content = serde_json::to_string(&meta).unwrap();
|
||||
fs::write(&lock_path, content).unwrap();
|
||||
|
||||
assert!(WriterLease::is_stale(&store_path, DEFAULT_STALE_THRESHOLD));
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fresh_lease_is_not_stale() {
|
||||
let dir = unique_dir("fresh_lease");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let _lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
|
||||
assert!(!WriterLease::is_stale(&store_path, DEFAULT_STALE_THRESHOLD));
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_lock_file_is_stale() {
|
||||
let path = Path::new("/tmp/nonexistent_rvlite_test_12345.rvf");
|
||||
assert!(WriterLease::is_stale(path, DEFAULT_STALE_THRESHOLD));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrupt_lock_file_is_stale() {
|
||||
let dir = unique_dir("corrupt");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let lock_path = lock_path_for(&store_path);
|
||||
|
||||
let _ = fs::create_dir_all(&dir);
|
||||
fs::write(&lock_path, b"not json").unwrap();
|
||||
assert!(WriterLease::is_stale(&store_path, DEFAULT_STALE_THRESHOLD));
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn refresh_heartbeat_updates_timestamp() {
|
||||
let dir = unique_dir("heartbeat");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
|
||||
// refresh_heartbeat overwrites the lock file with a new timestamp.
|
||||
lease.refresh_heartbeat().unwrap();
|
||||
|
||||
// Read back and verify timestamp is recent.
|
||||
let content = fs::read_to_string(lease.lock_path()).unwrap();
|
||||
let meta: LeaseMeta = serde_json::from_str(&content).unwrap();
|
||||
let age = current_unix_secs().saturating_sub(meta.timestamp_secs);
|
||||
assert!(age < 5, "heartbeat should be very recent, got age={age}s");
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stale_lease_force_acquire() {
|
||||
let dir = unique_dir("force_acquire");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
let lock_path = lock_path_for(&store_path);
|
||||
|
||||
// Simulate a stale lock from a dead process.
|
||||
let meta = LeaseMeta {
|
||||
pid: 999_999_999,
|
||||
timestamp_secs: current_unix_secs().saturating_sub(60),
|
||||
hostname: get_hostname(),
|
||||
};
|
||||
fs::write(&lock_path, serde_json::to_string(&meta).unwrap()).unwrap();
|
||||
|
||||
// Should succeed because the existing lock is stale.
|
||||
let mut lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
assert_eq!(lease.pid, std::process::id());
|
||||
|
||||
lease.release().unwrap();
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn release_is_idempotent() {
|
||||
let dir = unique_dir("idempotent");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let mut lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
lease.release().unwrap();
|
||||
// Second release should be a no-op.
|
||||
lease.release().unwrap();
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn debug_format() {
|
||||
let dir = unique_dir("debug_fmt");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
let debug = format!("{:?}", lease);
|
||||
assert!(debug.contains("WriterLease"));
|
||||
assert!(debug.contains("lock_path"));
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
}
|
||||
|
|
@ -275,27 +275,34 @@ Integrate `@ruvector/rvf` (and its WASM backend) into both packages in three pha
|
|||
|
||||
### npx ruvector (Phase 1)
|
||||
|
||||
- [ ] Add backend adapter matching existing core interface exactly
|
||||
- [ ] Add `rvf` CLI group with create, ingest, query, status, segments, derive, compact, export
|
||||
- [ ] Add hooks `--backend rvf` flag requiring explicit selection (no silent fallback)
|
||||
- [ ] Smoke test: create, ingest, query, restart process, query again -- same results
|
||||
- [ ] Error messages for missing `@ruvector/rvf` include install command
|
||||
- [x] Add backend adapter matching existing core interface exactly
|
||||
- [x] Add `rvf` CLI group with create, ingest, query, status, segments, derive, compact, export
|
||||
- [x] Add `rvf examples` and `rvf download` commands for example .rvf files
|
||||
- [x] Add 10 RVF tools to main MCP server (rvf_create through rvf_examples)
|
||||
- [x] Add hooks `--backend rvf` flag requiring explicit selection (no silent fallback)
|
||||
- [x] Error messages for missing `@ruvector/rvf` include install command
|
||||
- [x] Security: path validation, shell arg sanitization, redirect whitelist
|
||||
- [x] Smoke test: 4 Rust integration tests (full lifecycle, cosine, multi-restart, metadata)
|
||||
|
||||
### rvlite (Phase 2)
|
||||
|
||||
- [ ] Feature-flag RVF backend in Rust; default stays unchanged
|
||||
- [ ] Define and implement epoch reconciliation algorithm
|
||||
- [ ] Add `rvf-migrate` command with `--dry-run` and `--verify` modes
|
||||
- [ ] Add `rvf-rebuild` command to reconstruct metadata from RVF
|
||||
- [ ] Writer lease implementation (file lock on Node, heartbeat on browser)
|
||||
- [ ] Direct ID mapping: RVF vector IDs = SQL primary keys (no mapping layer)
|
||||
- [x] Feature-flag RVF backend in Rust; default stays unchanged
|
||||
- [x] Epoch reconciliation module (`crates/rvlite/src/storage/epoch.rs`)
|
||||
- [x] Auto-detection of `@ruvector/rvf-wasm` in TypeScript SDK
|
||||
- [x] `getStorageBackend()` and `isRvfAvailable()` exports
|
||||
- [x] Security: Cypher injection prevention, relation type validation, depth clamping
|
||||
- [x] Full epoch reconciliation algorithm (23 tests, `EpochTracker` with `AtomicU64`, thread-safe)
|
||||
- [x] `rvf-migrate` CLI command with `--dry-run` and `--verify` modes (idempotent, 1e-6 tolerance)
|
||||
- [x] `rvf-rebuild` CLI command to reconstruct metadata from RVF
|
||||
- [x] Writer lease (`WriterLease` with file lock + PID-based stale detection, `BrowserWriterLease` with IndexedDB heartbeat)
|
||||
- [x] Direct ID mapping: `IdMapping` trait, `DirectIdMap` (identity), `OffsetIdMap` (20 tests)
|
||||
|
||||
### Shared (Phase 3)
|
||||
|
||||
- [ ] Both packages import same WASM module entry point
|
||||
- [ ] CI build step fails if two copies of WASM artifact are present
|
||||
- [ ] MCP server rvlite tools are read-only by default, write requires flag
|
||||
- [ ] Cross-platform compatibility test: WASM write -> Node read -> WASM read
|
||||
- [x] `@ruvector/rvf-wasm` as shared optional peer dependency in rvlite
|
||||
- [x] CI build step (`wasm-dedup-check.yml`) fails if duplicate WASM artifacts detected
|
||||
- [x] 3 MCP server rvlite tools (`rvlite_sql`, `rvlite_cypher`, `rvlite_sparql`) — read-only default
|
||||
- [x] Cross-platform compatibility tests: 6 tests (cosine/L2/IP round-trip, segment preservation, byte-identical transfer)
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -343,6 +350,51 @@ A clean machine with no prior data can:
|
|||
|
||||
---
|
||||
|
||||
## Security Hardening (Phase 1 Addendum)
|
||||
|
||||
Applied security hardening across all three integration surfaces after audit.
|
||||
|
||||
### Vulnerabilities Addressed
|
||||
|
||||
| ID | Severity | Surface | Vulnerability | Fix |
|
||||
|----|----------|---------|---------------|-----|
|
||||
| S-01 | CRITICAL | CLI `rvf download` | Path traversal via crafted filenames | `sanitizeFileName()` + allowlist validation + path containment check |
|
||||
| S-02 | CRITICAL | MCP server | Command injection via `execSync` with user args | `sanitizeShellArg()` strips shell metacharacters; numeric args parsed with `parseInt()` |
|
||||
| S-03 | HIGH | MCP `rvf_*` tools | Path traversal via `args.path` | `validateRvfPath()` blocks `..`, null bytes, sensitive system paths |
|
||||
| S-04 | HIGH | CLI `rvf download` | SSRF via blind redirect following | `ALLOWED_REDIRECT_HOSTS` whitelist (GitHub domains only) |
|
||||
| S-05 | HIGH | CLI `rvf download` | URL injection | `encodeURIComponent()` on filenames in URLs |
|
||||
| S-06 | MEDIUM | rvlite `SemanticMemory` | Cypher injection via unsanitized user strings | `sanitizeCypher()` escapes quotes/backslashes/control chars |
|
||||
| S-07 | MEDIUM | rvlite `SemanticMemory` | Arbitrary relationship types in Cypher | `validateRelationType()` restricts to `[A-Za-z_][A-Za-z0-9_]*` |
|
||||
| S-08 | MEDIUM | MCP server hooks | Numeric arg injection | All numeric args (`threshold`, `top_k`, `days`, etc.) parsed with `parseInt()` + fallback defaults |
|
||||
| S-09 | MEDIUM | rvlite `SemanticMemory` | Graph traversal depth abuse | `findRelated()` depth clamped to `[1, 10]` |
|
||||
|
||||
### Security Helpers Added
|
||||
|
||||
**`mcp-server.js`** (3 functions):
|
||||
- `validateRvfPath(filePath)` -- blocks path traversal, null bytes, and sensitive system paths
|
||||
- `sanitizeShellArg(arg)` -- strips shell metacharacters (`\``, `$()`, `{}`, `|`, `;`, `&`, `<>`, `!`, `..`)
|
||||
- Numeric args validated with `parseInt()` in all 15+ command handlers
|
||||
|
||||
**`cli.js`** (download command):
|
||||
- `sanitizeFileName(name)` -- strips path separators, validates `/^[\w\-.]+$/`
|
||||
- `ALLOWED_REDIRECT_HOSTS` -- whitelist: `raw.githubusercontent.com`, `objects.githubusercontent.com`, `github.com`
|
||||
- Path containment: `path.resolve(dest).startsWith(path.resolve(outDir))`
|
||||
- Allowlist: downloads validated against known `RVF_EXAMPLES` catalog
|
||||
|
||||
**`rvlite/src/index.ts`**:
|
||||
- `sanitizeCypher(value)` -- escapes `\`, `"`, `'`, control characters
|
||||
- `validateRelationType(rel)` -- validates `[A-Za-z_][A-Za-z0-9_]*`
|
||||
|
||||
### Files Modified
|
||||
|
||||
| File | Change |
|
||||
|------|--------|
|
||||
| `npm/packages/ruvector/bin/cli.js` | +25 lines: filename sanitization, redirect validation, path containment, allowlist |
|
||||
| `npm/packages/ruvector/bin/mcp-server.js` | +40 lines: `validateRvfPath()`, `sanitizeShellArg()`, applied to all 25+ handlers |
|
||||
| `npm/packages/rvlite/src/index.ts` | +20 lines: `sanitizeCypher()`, `validateRelationType()`, depth clamping |
|
||||
|
||||
---
|
||||
|
||||
## Verification
|
||||
|
||||
```bash
|
||||
|
|
@ -354,6 +406,11 @@ npx ruvector rvf status test.rvf
|
|||
npx ruvector hooks remember --backend rvf --store hooks.rvf "test pattern"
|
||||
npx ruvector hooks recall --backend rvf --store hooks.rvf "test"
|
||||
|
||||
# Phase 1: Example download
|
||||
npx ruvector rvf examples
|
||||
npx ruvector rvf download basic_store agent_memory
|
||||
npx ruvector rvf download --all -o ./rvf-examples
|
||||
|
||||
# Phase 2: rvlite RVF backend
|
||||
cargo test -p rvlite --features rvf-backend
|
||||
# npm test for rvlite with RVF factory
|
||||
|
|
|
|||
|
|
@ -1940,6 +1940,9 @@ npm test
|
|||
- **[ruvector-core](https://www.npmjs.com/package/ruvector-core)** - Core native bindings (lower-level API)
|
||||
- **[ruvector-wasm](https://www.npmjs.com/package/ruvector-wasm)** - WebAssembly implementation for browsers
|
||||
- **[ruvector-cli](https://www.npmjs.com/package/ruvector-cli)** - Standalone CLI tools
|
||||
- **[@ruvector/rvf](https://www.npmjs.com/package/@ruvector/rvf)** - RVF cognitive container SDK
|
||||
- **[@ruvector/rvf-wasm](https://www.npmjs.com/package/@ruvector/rvf-wasm)** - RVF WASM build for browsers, Deno, and edge
|
||||
- **[rvlite](https://www.npmjs.com/package/rvlite)** - Lightweight vector database with SQL, SPARQL, and Cypher
|
||||
|
||||
### Platform-Specific Packages (auto-installed)
|
||||
|
||||
|
|
@ -1949,6 +1952,93 @@ npm test
|
|||
- **[ruvector-core-darwin-arm64](https://www.npmjs.com/package/ruvector-core-darwin-arm64)**
|
||||
- **[ruvector-core-win32-x64-msvc](https://www.npmjs.com/package/ruvector-core-win32-x64-msvc)**
|
||||
|
||||
---
|
||||
|
||||
## RVF Cognitive Containers
|
||||
|
||||
Ruvector integrates with [RVF (RuVector Format)](https://github.com/ruvnet/ruvector/tree/main/crates/rvf) — a universal binary substrate that stores vectors, models, graphs, compute kernels, and attestation in a single `.rvf` file.
|
||||
|
||||
### Enable RVF Backend
|
||||
|
||||
```bash
|
||||
# Install the optional RVF package
|
||||
npm install @ruvector/rvf
|
||||
|
||||
# Set backend via environment variable
|
||||
export RUVECTOR_BACKEND=rvf
|
||||
|
||||
# Or detect automatically (native -> rvf -> wasm fallback)
|
||||
npx ruvector info
|
||||
```
|
||||
|
||||
```typescript
|
||||
import { getImplementationType, isRvf } from 'ruvector';
|
||||
|
||||
console.log(getImplementationType()); // 'native' | 'rvf' | 'wasm'
|
||||
console.log(isRvf()); // true if RVF backend is active
|
||||
```
|
||||
|
||||
### RVF CLI Commands
|
||||
|
||||
8 RVF-specific subcommands are available through the ruvector CLI:
|
||||
|
||||
```bash
|
||||
# Create an RVF store
|
||||
npx ruvector rvf create mydb.rvf -d 384 --metric cosine
|
||||
|
||||
# Ingest vectors from JSON
|
||||
npx ruvector rvf ingest mydb.rvf --input vectors.json --format json
|
||||
|
||||
# Query nearest neighbors
|
||||
npx ruvector rvf query mydb.rvf --vector "[0.1,0.2,...]" --k 10
|
||||
|
||||
# File status and segment listing
|
||||
npx ruvector rvf status mydb.rvf
|
||||
npx ruvector rvf segments mydb.rvf
|
||||
|
||||
# COW branching — derive a child file
|
||||
npx ruvector rvf derive mydb.rvf --output child.rvf
|
||||
|
||||
# Compact and reclaim space
|
||||
npx ruvector rvf compact mydb.rvf
|
||||
|
||||
# Export to JSON
|
||||
npx ruvector rvf export mydb.rvf --output dump.json
|
||||
```
|
||||
|
||||
### RVF Platform Support
|
||||
|
||||
| Platform | Runtime | Backend |
|
||||
|----------|---------|---------|
|
||||
| Linux x86_64 / aarch64 | Node.js 18+ | Native (N-API) |
|
||||
| macOS x86_64 / arm64 | Node.js 18+ | Native (N-API) |
|
||||
| Windows x86_64 | Node.js 18+ | Native (N-API) |
|
||||
| Any | Deno | WASM (`@ruvector/rvf-wasm`) |
|
||||
| Any | Browser | WASM (`@ruvector/rvf-wasm`) |
|
||||
| Any | Cloudflare Workers | WASM (`@ruvector/rvf-wasm`) |
|
||||
|
||||
### Download Example .rvf Files
|
||||
|
||||
45 pre-built example files are available (~11 MB total):
|
||||
|
||||
```bash
|
||||
# Download a specific example
|
||||
curl -LO https://raw.githubusercontent.com/ruvnet/ruvector/main/examples/rvf/output/basic_store.rvf
|
||||
|
||||
# Popular examples:
|
||||
# basic_store.rvf (152 KB) — 1,000 vectors, dim 128
|
||||
# semantic_search.rvf (755 KB) — Semantic search with HNSW
|
||||
# rag_pipeline.rvf (303 KB) — RAG pipeline embeddings
|
||||
# agent_memory.rvf (32 KB) — AI agent memory store
|
||||
# self_booting.rvf (31 KB) — Self-booting with kernel
|
||||
# progressive_index.rvf (2.5 MB) — Large-scale HNSW index
|
||||
|
||||
# Generate all examples locally
|
||||
cd crates/rvf && cargo run --example generate_all
|
||||
```
|
||||
|
||||
Full catalog: [examples/rvf/output/](https://github.com/ruvnet/ruvector/tree/main/examples/rvf/output)
|
||||
|
||||
## 🐛 Troubleshooting
|
||||
|
||||
### Native Module Not Loading
|
||||
|
|
|
|||
|
|
@ -7120,6 +7120,167 @@ rvfCmd.command('export <path>')
|
|||
} catch (e) { console.error(chalk.red(e.message)); process.exit(1); }
|
||||
});
|
||||
|
||||
// RVF example download/list commands
|
||||
const RVF_EXAMPLES = [
|
||||
{ name: 'basic_store', size: '152 KB', desc: '1,000 vectors, dim 128, cosine metric' },
|
||||
{ name: 'semantic_search', size: '755 KB', desc: 'Semantic search with HNSW index' },
|
||||
{ name: 'rag_pipeline', size: '303 KB', desc: 'RAG pipeline with embeddings' },
|
||||
{ name: 'embedding_cache', size: '755 KB', desc: 'Cached embedding store' },
|
||||
{ name: 'quantization', size: '1.5 MB', desc: 'PQ-compressed vectors' },
|
||||
{ name: 'progressive_index', size: '2.5 MB', desc: 'Large-scale progressive HNSW index' },
|
||||
{ name: 'filtered_search', size: '255 KB', desc: 'Metadata-filtered vector search' },
|
||||
{ name: 'recommendation', size: '102 KB', desc: 'Recommendation engine vectors' },
|
||||
{ name: 'agent_memory', size: '32 KB', desc: 'AI agent episodic memory' },
|
||||
{ name: 'swarm_knowledge', size: '86 KB', desc: 'Multi-agent shared knowledge base' },
|
||||
{ name: 'experience_replay', size: '27 KB', desc: 'RL experience replay buffer' },
|
||||
{ name: 'tool_cache', size: '26 KB', desc: 'MCP tool call cache' },
|
||||
{ name: 'mcp_in_rvf', size: '32 KB', desc: 'MCP server embedded in RVF' },
|
||||
{ name: 'ruvbot', size: '51 KB', desc: 'Chatbot knowledge store' },
|
||||
{ name: 'claude_code_appliance', size: '17 KB', desc: 'Claude Code cognitive appliance' },
|
||||
{ name: 'lineage_parent', size: '52 KB', desc: 'COW parent file' },
|
||||
{ name: 'lineage_child', size: '26 KB', desc: 'COW child (derived) file' },
|
||||
{ name: 'self_booting', size: '31 KB', desc: 'Self-booting with KERNEL_SEG' },
|
||||
{ name: 'linux_microkernel', size: '15 KB', desc: 'Embedded Linux microkernel' },
|
||||
{ name: 'ebpf_accelerator', size: '153 KB', desc: 'eBPF distance accelerator' },
|
||||
{ name: 'browser_wasm', size: '14 KB', desc: 'Browser WASM module embedded' },
|
||||
{ name: 'tee_attestation', size: '102 KB', desc: 'TEE attestation with witnesses' },
|
||||
{ name: 'zero_knowledge', size: '52 KB', desc: 'ZK-proof witness chain' },
|
||||
{ name: 'sealed_engine', size: '208 KB', desc: 'Sealed inference engine' },
|
||||
{ name: 'access_control', size: '77 KB', desc: 'Permission-gated vectors' },
|
||||
{ name: 'financial_signals', size: '202 KB', desc: 'Financial signal vectors' },
|
||||
{ name: 'medical_imaging', size: '302 KB', desc: 'Medical imaging embeddings' },
|
||||
{ name: 'legal_discovery', size: '903 KB', desc: 'Legal document discovery' },
|
||||
{ name: 'multimodal_fusion', size: '804 KB', desc: 'Multi-modal embedding fusion' },
|
||||
{ name: 'hyperbolic_taxonomy', size: '23 KB', desc: 'Hyperbolic space taxonomy' },
|
||||
{ name: 'network_telemetry', size: '16 KB', desc: 'Network telemetry vectors' },
|
||||
{ name: 'postgres_bridge', size: '152 KB', desc: 'PostgreSQL bridge vectors' },
|
||||
{ name: 'ruvllm_inference', size: '133 KB', desc: 'RuvLLM inference cache' },
|
||||
{ name: 'serverless', size: '509 KB', desc: 'Serverless deployment bundle' },
|
||||
{ name: 'edge_iot', size: '27 KB', desc: 'Edge/IoT lightweight store' },
|
||||
{ name: 'dedup_detector', size: '153 KB', desc: 'Deduplication detector' },
|
||||
{ name: 'compacted', size: '77 KB', desc: 'Post-compaction example' },
|
||||
{ name: 'posix_fileops', size: '52 KB', desc: 'POSIX file operations test' },
|
||||
{ name: 'network_sync_a', size: '52 KB', desc: 'Network sync peer A' },
|
||||
{ name: 'network_sync_b', size: '52 KB', desc: 'Network sync peer B' },
|
||||
{ name: 'agent_handoff_a', size: '31 KB', desc: 'Agent handoff source' },
|
||||
{ name: 'agent_handoff_b', size: '11 KB', desc: 'Agent handoff target' },
|
||||
{ name: 'reasoning_parent', size: '5.6 KB', desc: 'Reasoning chain parent' },
|
||||
{ name: 'reasoning_child', size: '8.1 KB', desc: 'Reasoning chain child' },
|
||||
{ name: 'reasoning_grandchild', size: '162 B', desc: 'Minimal derived file' },
|
||||
];
|
||||
|
||||
const RVF_BASE_URL = 'https://raw.githubusercontent.com/ruvnet/ruvector/main/examples/rvf/output';
|
||||
|
||||
rvfCmd.command('examples')
|
||||
.description('List available example .rvf files')
|
||||
.option('--json', 'Output as JSON')
|
||||
.action((opts) => {
|
||||
if (opts.json) {
|
||||
console.log(JSON.stringify(RVF_EXAMPLES, null, 2));
|
||||
return;
|
||||
}
|
||||
console.log(chalk.bold.cyan('\nAvailable RVF Example Files (45 total)\n'));
|
||||
console.log(chalk.dim(`Download: npx ruvector rvf download <name>\n`));
|
||||
const maxName = Math.max(...RVF_EXAMPLES.map(e => e.name.length));
|
||||
const maxSize = Math.max(...RVF_EXAMPLES.map(e => e.size.length));
|
||||
for (const ex of RVF_EXAMPLES) {
|
||||
const name = chalk.green(ex.name.padEnd(maxName));
|
||||
const size = chalk.yellow(ex.size.padStart(maxSize));
|
||||
console.log(` ${name} ${size} ${chalk.dim(ex.desc)}`);
|
||||
}
|
||||
console.log(chalk.dim(`\nFull catalog: https://github.com/ruvnet/ruvector/tree/main/examples/rvf/output\n`));
|
||||
});
|
||||
|
||||
rvfCmd.command('download [names...]')
|
||||
.description('Download example .rvf files from GitHub')
|
||||
.option('-a, --all', 'Download all 45 examples (~11 MB)')
|
||||
.option('-o, --output <dir>', 'Output directory', '.')
|
||||
.action(async (names, opts) => {
|
||||
const https = require('https');
|
||||
const ALLOWED_REDIRECT_HOSTS = ['raw.githubusercontent.com', 'objects.githubusercontent.com', 'github.com'];
|
||||
const sanitizeFileName = (name) => {
|
||||
// Strip path separators and parent directory references
|
||||
const base = path.basename(name);
|
||||
// Only allow alphanumeric, underscores, hyphens, dots
|
||||
if (!/^[\w\-.]+$/.test(base)) throw new Error(`Invalid filename: ${base}`);
|
||||
return base;
|
||||
};
|
||||
const downloadFile = (url, dest) => new Promise((resolve, reject) => {
|
||||
const file = fs.createWriteStream(dest);
|
||||
https.get(url, (res) => {
|
||||
if (res.statusCode === 302 || res.statusCode === 301) {
|
||||
const redirectUrl = res.headers.location;
|
||||
try {
|
||||
const redirectHost = new URL(redirectUrl).hostname;
|
||||
if (!ALLOWED_REDIRECT_HOSTS.includes(redirectHost)) {
|
||||
file.close();
|
||||
reject(new Error(`Redirect to untrusted host: ${redirectHost}`));
|
||||
return;
|
||||
}
|
||||
} catch { file.close(); reject(new Error('Invalid redirect URL')); return; }
|
||||
https.get(redirectUrl, (res2) => { res2.pipe(file); file.on('finish', () => { file.close(); resolve(); }); }).on('error', reject);
|
||||
return;
|
||||
}
|
||||
if (res.statusCode !== 200) { file.close(); fs.unlinkSync(dest); reject(new Error(`HTTP ${res.statusCode}`)); return; }
|
||||
res.pipe(file);
|
||||
file.on('finish', () => { file.close(); resolve(); });
|
||||
}).on('error', reject);
|
||||
});
|
||||
|
||||
let toDownload = [];
|
||||
if (opts.all) {
|
||||
toDownload = RVF_EXAMPLES.map(e => e.name);
|
||||
} else if (names && names.length > 0) {
|
||||
toDownload = names;
|
||||
} else {
|
||||
console.error(chalk.red('Specify example names or use --all. Run `npx ruvector rvf examples` to list.'));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const outDir = path.resolve(opts.output);
|
||||
if (!fs.existsSync(outDir)) fs.mkdirSync(outDir, { recursive: true });
|
||||
|
||||
console.log(chalk.bold.cyan(`\nDownloading ${toDownload.length} .rvf file(s) to ${outDir}\n`));
|
||||
let ok = 0, fail = 0;
|
||||
for (const name of toDownload) {
|
||||
const rawName = name.endsWith('.rvf') ? name : `${name}.rvf`;
|
||||
let fileName;
|
||||
try { fileName = sanitizeFileName(rawName); } catch (e) {
|
||||
console.log(chalk.red(`SKIPPED: ${e.message}`));
|
||||
fail++;
|
||||
continue;
|
||||
}
|
||||
// Validate against known examples when not using --all
|
||||
if (!opts.all) {
|
||||
const baseName = fileName.replace(/\.rvf$/, '');
|
||||
if (!RVF_EXAMPLES.some(e => e.name === baseName)) {
|
||||
console.log(chalk.red(`SKIPPED: Unknown example '${baseName}'. Run 'npx ruvector rvf examples' to list.`));
|
||||
fail++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
const url = `${RVF_BASE_URL}/${encodeURIComponent(fileName)}`;
|
||||
const dest = path.join(outDir, fileName);
|
||||
// Path containment check
|
||||
if (!path.resolve(dest).startsWith(path.resolve(outDir) + path.sep) && path.resolve(dest) !== path.resolve(outDir)) {
|
||||
console.log(chalk.red(`SKIPPED: Path traversal detected for '${fileName}'`));
|
||||
fail++;
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
process.stdout.write(chalk.dim(` ${fileName} ... `));
|
||||
await downloadFile(url, dest);
|
||||
const stat = fs.statSync(dest);
|
||||
console.log(chalk.green(`OK (${(stat.size / 1024).toFixed(0)} KB)`));
|
||||
ok++;
|
||||
} catch (e) {
|
||||
console.log(chalk.red(`FAILED: ${e.message}`));
|
||||
fail++;
|
||||
}
|
||||
}
|
||||
console.log(chalk.bold(`\nDone: ${ok} downloaded, ${fail} failed\n`));
|
||||
});
|
||||
|
||||
// MCP Server command
|
||||
const mcpCmd = program.command('mcp').description('MCP (Model Context Protocol) server for Claude Code integration');
|
||||
|
||||
|
|
@ -7142,7 +7303,7 @@ mcpCmd.command('info')
|
|||
console.log(chalk.white('The RuVector MCP server provides self-learning intelligence'));
|
||||
console.log(chalk.white('tools to Claude Code via the Model Context Protocol.\n'));
|
||||
|
||||
console.log(chalk.bold('Available Tools:'));
|
||||
console.log(chalk.bold('Hooks Tools:'));
|
||||
console.log(chalk.dim(' hooks_stats - Get intelligence statistics'));
|
||||
console.log(chalk.dim(' hooks_route - Route task to best agent'));
|
||||
console.log(chalk.dim(' hooks_remember - Store context in vector memory'));
|
||||
|
|
@ -7154,6 +7315,23 @@ mcpCmd.command('info')
|
|||
console.log(chalk.dim(' hooks_doctor - Diagnose setup issues'));
|
||||
console.log(chalk.dim(' hooks_export - Export intelligence data'));
|
||||
|
||||
console.log(chalk.bold('\nRVF Vector Store Tools:'));
|
||||
console.log(chalk.dim(' rvf_create - Create new .rvf vector store'));
|
||||
console.log(chalk.dim(' rvf_open - Open existing .rvf store'));
|
||||
console.log(chalk.dim(' rvf_ingest - Insert vectors into store'));
|
||||
console.log(chalk.dim(' rvf_query - Query nearest neighbors'));
|
||||
console.log(chalk.dim(' rvf_delete - Delete vectors by ID'));
|
||||
console.log(chalk.dim(' rvf_status - Get store status'));
|
||||
console.log(chalk.dim(' rvf_compact - Compact store'));
|
||||
console.log(chalk.dim(' rvf_derive - COW-branch to child store'));
|
||||
console.log(chalk.dim(' rvf_segments - List file segments'));
|
||||
console.log(chalk.dim(' rvf_examples - List example .rvf files'));
|
||||
|
||||
console.log(chalk.bold('\nrvlite Query Tools:'));
|
||||
console.log(chalk.dim(' rvlite_sql - Execute SQL query over rvlite vector DB'));
|
||||
console.log(chalk.dim(' rvlite_cypher - Execute Cypher graph query'));
|
||||
console.log(chalk.dim(' rvlite_sparql - Execute SPARQL RDF query'));
|
||||
|
||||
console.log(chalk.bold('\n📦 Resources:'));
|
||||
console.log(chalk.dim(' ruvector://intelligence/stats - Current statistics'));
|
||||
console.log(chalk.dim(' ruvector://intelligence/patterns - Learned patterns'));
|
||||
|
|
|
|||
|
|
@ -24,7 +24,46 @@ const {
|
|||
} = require('@modelcontextprotocol/sdk/types.js');
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
const { execSync } = require('child_process');
|
||||
const { execSync, execFileSync } = require('child_process');
|
||||
|
||||
// ── Security Helpers ────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Validate a file path argument for RVF operations.
|
||||
* Prevents path traversal and restricts to safe locations.
|
||||
*/
|
||||
function validateRvfPath(filePath) {
|
||||
if (typeof filePath !== 'string' || filePath.length === 0) {
|
||||
throw new Error('Path must be a non-empty string');
|
||||
}
|
||||
const resolved = path.resolve(filePath);
|
||||
// Block obvious path traversal
|
||||
if (filePath.includes('..') || filePath.includes('\0')) {
|
||||
throw new Error('Path traversal detected');
|
||||
}
|
||||
// Block sensitive system paths
|
||||
const blocked = ['/etc', '/proc', '/sys', '/dev', '/boot', '/root', '/var/run'];
|
||||
for (const prefix of blocked) {
|
||||
if (resolved.startsWith(prefix)) {
|
||||
throw new Error(`Access to ${prefix} is not allowed`);
|
||||
}
|
||||
}
|
||||
return resolved;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize a shell argument to prevent command injection.
|
||||
* Strips shell metacharacters and limits length.
|
||||
*/
|
||||
function sanitizeShellArg(arg) {
|
||||
if (typeof arg !== 'string') return '';
|
||||
// Remove null bytes, backticks, $(), and other shell metacharacters
|
||||
return arg
|
||||
.replace(/\0/g, '')
|
||||
.replace(/[`$(){}|;&<>!]/g, '')
|
||||
.replace(/\.\./g, '')
|
||||
.slice(0, 4096);
|
||||
}
|
||||
|
||||
// Try to load the full IntelligenceEngine
|
||||
let IntelligenceEngine = null;
|
||||
|
|
@ -1045,6 +1084,161 @@ const TOOLS = [
|
|||
},
|
||||
required: []
|
||||
}
|
||||
},
|
||||
// ── RVF Vector Store Tools ────────────────────────────────────────────────
|
||||
{
|
||||
name: 'rvf_create',
|
||||
description: 'Create a new RVF vector store (.rvf file) with specified dimensions and distance metric',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
path: { type: 'string', description: 'File path for the new .rvf store' },
|
||||
dimension: { type: 'number', description: 'Vector dimensionality (e.g. 128, 384, 768, 1536)' },
|
||||
metric: { type: 'string', description: 'Distance metric: cosine, l2, or dotproduct', default: 'cosine' }
|
||||
},
|
||||
required: ['path', 'dimension']
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'rvf_open',
|
||||
description: 'Open an existing RVF store for read-write operations',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
path: { type: 'string', description: 'Path to existing .rvf file' }
|
||||
},
|
||||
required: ['path']
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'rvf_ingest',
|
||||
description: 'Insert vectors into an RVF store',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
path: { type: 'string', description: 'Path to .rvf store' },
|
||||
entries: { type: 'array', description: 'Array of {id, vector, metadata?} objects', items: { type: 'object' } }
|
||||
},
|
||||
required: ['path', 'entries']
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'rvf_query',
|
||||
description: 'Query nearest neighbors in an RVF store',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
path: { type: 'string', description: 'Path to .rvf store' },
|
||||
vector: { type: 'array', description: 'Query vector as array of numbers', items: { type: 'number' } },
|
||||
k: { type: 'number', description: 'Number of results to return', default: 10 }
|
||||
},
|
||||
required: ['path', 'vector']
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'rvf_delete',
|
||||
description: 'Delete vectors by ID from an RVF store',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
path: { type: 'string', description: 'Path to .rvf store' },
|
||||
ids: { type: 'array', description: 'Vector IDs to delete', items: { type: 'number' } }
|
||||
},
|
||||
required: ['path', 'ids']
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'rvf_status',
|
||||
description: 'Get status of an RVF store (vector count, dimension, metric, file size)',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
path: { type: 'string', description: 'Path to .rvf store' }
|
||||
},
|
||||
required: ['path']
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'rvf_compact',
|
||||
description: 'Compact an RVF store to reclaim space from deleted vectors',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
path: { type: 'string', description: 'Path to .rvf store' }
|
||||
},
|
||||
required: ['path']
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'rvf_derive',
|
||||
description: 'Derive a child RVF store from a parent using copy-on-write branching',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
parent_path: { type: 'string', description: 'Path to parent .rvf store' },
|
||||
child_path: { type: 'string', description: 'Path for the new child .rvf store' }
|
||||
},
|
||||
required: ['parent_path', 'child_path']
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'rvf_segments',
|
||||
description: 'List all segments in an RVF file (VEC, INDEX, KERNEL, EBPF, WITNESS, etc.)',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
path: { type: 'string', description: 'Path to .rvf store' }
|
||||
},
|
||||
required: ['path']
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'rvf_examples',
|
||||
description: 'List available example .rvf files with download URLs from the ruvector repository',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
filter: { type: 'string', description: 'Filter examples by name or description substring' }
|
||||
},
|
||||
required: []
|
||||
}
|
||||
},
|
||||
// ── rvlite Query Tools ──────────────────────────────────────────────────
|
||||
{
|
||||
name: 'rvlite_sql',
|
||||
description: 'Execute SQL query over rvlite vector database with optional RVF backend',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
query: { type: 'string', description: 'SQL query string (supports distance() and vec_search() functions)' },
|
||||
db_path: { type: 'string', description: 'Path to database file (optional)' }
|
||||
},
|
||||
required: ['query']
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'rvlite_cypher',
|
||||
description: 'Execute Cypher graph query over rvlite property graph',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
query: { type: 'string', description: 'Cypher query string' },
|
||||
db_path: { type: 'string', description: 'Path to database file (optional)' }
|
||||
},
|
||||
required: ['query']
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'rvlite_sparql',
|
||||
description: 'Execute SPARQL query over rvlite RDF triple store',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
query: { type: 'string', description: 'SPARQL query string' },
|
||||
db_path: { type: 'string', description: 'Path to database file (optional)' }
|
||||
},
|
||||
required: ['query']
|
||||
}
|
||||
}
|
||||
];
|
||||
|
||||
|
|
@ -1654,7 +1848,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
|
||||
case 'hooks_ast_analyze': {
|
||||
try {
|
||||
const output = execSync(`npx ruvector hooks ast-analyze "${args.file}" --json`, { encoding: 'utf-8', timeout: 30000 });
|
||||
const safeFile = sanitizeShellArg(args.file);
|
||||
const output = execSync(`npx ruvector hooks ast-analyze "${safeFile}" --json`, { encoding: 'utf-8', timeout: 30000 });
|
||||
return { content: [{ type: 'text', text: output }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message }, null, 2) }] };
|
||||
|
|
@ -1663,8 +1858,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
|
||||
case 'hooks_ast_complexity': {
|
||||
try {
|
||||
const filesArg = args.files.map(f => `"${f}"`).join(' ');
|
||||
const output = execSync(`npx ruvector hooks ast-complexity ${filesArg} --threshold ${args.threshold || 10}`, { encoding: 'utf-8', timeout: 60000 });
|
||||
const filesArg = args.files.map(f => `"${sanitizeShellArg(f)}"`).join(' ');
|
||||
const threshold = parseInt(args.threshold, 10) || 10;
|
||||
const output = execSync(`npx ruvector hooks ast-complexity ${filesArg} --threshold ${threshold}`, { encoding: 'utf-8', timeout: 60000 });
|
||||
return { content: [{ type: 'text', text: output }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message }, null, 2) }] };
|
||||
|
|
@ -1673,7 +1869,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
|
||||
case 'hooks_diff_analyze': {
|
||||
try {
|
||||
const cmd = args.commit ? `npx ruvector hooks diff-analyze "${args.commit}" --json` : 'npx ruvector hooks diff-analyze --json';
|
||||
const cmd = args.commit ? `npx ruvector hooks diff-analyze "${sanitizeShellArg(args.commit)}" --json` : 'npx ruvector hooks diff-analyze --json';
|
||||
const output = execSync(cmd, { encoding: 'utf-8', timeout: 60000 });
|
||||
return { content: [{ type: 'text', text: output }] };
|
||||
} catch (e) {
|
||||
|
|
@ -1683,7 +1879,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
|
||||
case 'hooks_diff_classify': {
|
||||
try {
|
||||
const cmd = args.commit ? `npx ruvector hooks diff-classify "${args.commit}"` : 'npx ruvector hooks diff-classify';
|
||||
const cmd = args.commit ? `npx ruvector hooks diff-classify "${sanitizeShellArg(args.commit)}"` : 'npx ruvector hooks diff-classify';
|
||||
const output = execSync(cmd, { encoding: 'utf-8', timeout: 30000 });
|
||||
return { content: [{ type: 'text', text: output }] };
|
||||
} catch (e) {
|
||||
|
|
@ -1693,7 +1889,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
|
||||
case 'hooks_diff_similar': {
|
||||
try {
|
||||
const output = execSync(`npx ruvector hooks diff-similar -k ${args.top_k || 5} --commits ${args.commits || 50}`, { encoding: 'utf-8', timeout: 120000 });
|
||||
const topK = parseInt(args.top_k, 10) || 5;
|
||||
const commits = parseInt(args.commits, 10) || 50;
|
||||
const output = execSync(`npx ruvector hooks diff-similar -k ${topK} --commits ${commits}`, { encoding: 'utf-8', timeout: 120000 });
|
||||
return { content: [{ type: 'text', text: output }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message }, null, 2) }] };
|
||||
|
|
@ -1702,7 +1900,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
|
||||
case 'hooks_coverage_route': {
|
||||
try {
|
||||
const output = execSync(`npx ruvector hooks coverage-route "${args.file}"`, { encoding: 'utf-8', timeout: 15000 });
|
||||
const safeFile = sanitizeShellArg(args.file);
|
||||
const output = execSync(`npx ruvector hooks coverage-route "${safeFile}"`, { encoding: 'utf-8', timeout: 15000 });
|
||||
return { content: [{ type: 'text', text: output }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message }, null, 2) }] };
|
||||
|
|
@ -1711,7 +1910,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
|
||||
case 'hooks_coverage_suggest': {
|
||||
try {
|
||||
const filesArg = args.files.map(f => `"${f}"`).join(' ');
|
||||
const filesArg = args.files.map(f => `"${sanitizeShellArg(f)}"`).join(' ');
|
||||
const output = execSync(`npx ruvector hooks coverage-suggest ${filesArg}`, { encoding: 'utf-8', timeout: 30000 });
|
||||
return { content: [{ type: 'text', text: output }] };
|
||||
} catch (e) {
|
||||
|
|
@ -1721,7 +1920,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
|
||||
case 'hooks_graph_mincut': {
|
||||
try {
|
||||
const filesArg = args.files.map(f => `"${f}"`).join(' ');
|
||||
const filesArg = args.files.map(f => `"${sanitizeShellArg(f)}"`).join(' ');
|
||||
const output = execSync(`npx ruvector hooks graph-mincut ${filesArg}`, { encoding: 'utf-8', timeout: 60000 });
|
||||
return { content: [{ type: 'text', text: output }] };
|
||||
} catch (e) {
|
||||
|
|
@ -1731,9 +1930,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
|
||||
case 'hooks_graph_cluster': {
|
||||
try {
|
||||
const filesArg = args.files.map(f => `"${f}"`).join(' ');
|
||||
const method = args.method || 'louvain';
|
||||
const clusters = args.clusters || 3;
|
||||
const filesArg = args.files.map(f => `"${sanitizeShellArg(f)}"`).join(' ');
|
||||
const method = sanitizeShellArg(args.method || 'louvain');
|
||||
const clusters = parseInt(args.clusters, 10) || 3;
|
||||
const output = execSync(`npx ruvector hooks graph-cluster ${filesArg} --method ${method} --clusters ${clusters}`, { encoding: 'utf-8', timeout: 60000 });
|
||||
return { content: [{ type: 'text', text: output }] };
|
||||
} catch (e) {
|
||||
|
|
@ -1743,7 +1942,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
|
||||
case 'hooks_security_scan': {
|
||||
try {
|
||||
const filesArg = args.files.map(f => `"${f}"`).join(' ');
|
||||
const filesArg = args.files.map(f => `"${sanitizeShellArg(f)}"`).join(' ');
|
||||
const output = execSync(`npx ruvector hooks security-scan ${filesArg}`, { encoding: 'utf-8', timeout: 120000 });
|
||||
return { content: [{ type: 'text', text: output }] };
|
||||
} catch (e) {
|
||||
|
|
@ -1753,7 +1952,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
|
||||
case 'hooks_rag_context': {
|
||||
try {
|
||||
let cmd = `npx ruvector hooks rag-context "${args.query}" -k ${args.top_k || 5}`;
|
||||
const safeQuery = sanitizeShellArg(args.query);
|
||||
const topK = parseInt(args.top_k, 10) || 5;
|
||||
let cmd = `npx ruvector hooks rag-context "${safeQuery}" -k ${topK}`;
|
||||
if (args.rerank) cmd += ' --rerank';
|
||||
const output = execSync(cmd, { encoding: 'utf-8', timeout: 30000 });
|
||||
return { content: [{ type: 'text', text: output }] };
|
||||
|
|
@ -1764,7 +1965,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
|
||||
case 'hooks_git_churn': {
|
||||
try {
|
||||
const output = execSync(`npx ruvector hooks git-churn --days ${args.days || 30} --top ${args.top || 10}`, { encoding: 'utf-8', timeout: 30000 });
|
||||
const days = parseInt(args.days, 10) || 30;
|
||||
const top = parseInt(args.top, 10) || 10;
|
||||
const output = execSync(`npx ruvector hooks git-churn --days ${days} --top ${top}`, { encoding: 'utf-8', timeout: 30000 });
|
||||
return { content: [{ type: 'text', text: output }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message }, null, 2) }] };
|
||||
|
|
@ -1773,8 +1976,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
|
||||
case 'hooks_route_enhanced': {
|
||||
try {
|
||||
let cmd = `npx ruvector hooks route-enhanced "${args.task}"`;
|
||||
if (args.file) cmd += ` --file "${args.file}"`;
|
||||
const safeTask = sanitizeShellArg(args.task);
|
||||
let cmd = `npx ruvector hooks route-enhanced "${safeTask}"`;
|
||||
if (args.file) cmd += ` --file "${sanitizeShellArg(args.file)}"`;
|
||||
const output = execSync(cmd, { encoding: 'utf-8', timeout: 30000 });
|
||||
return { content: [{ type: 'text', text: output }] };
|
||||
} catch (e) {
|
||||
|
|
@ -2199,7 +2403,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
// BACKGROUND WORKERS HANDLERS (via agentic-flow)
|
||||
// ============================================
|
||||
case 'workers_dispatch': {
|
||||
const prompt = args.prompt;
|
||||
const prompt = sanitizeShellArg(args.prompt);
|
||||
try {
|
||||
const result = execSync(`npx agentic-flow@alpha workers dispatch "${prompt.replace(/"/g, '\\"')}"`, {
|
||||
encoding: 'utf-8',
|
||||
|
|
@ -2380,8 +2584,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
}
|
||||
|
||||
case 'workers_run': {
|
||||
const name = args.name;
|
||||
const targetPath = args.path || '.';
|
||||
const name = sanitizeShellArg(args.name);
|
||||
const targetPath = sanitizeShellArg(args.path || '.');
|
||||
try {
|
||||
const result = execSync(`npx agentic-flow@alpha workers run "${name}" --path "${targetPath}"`, {
|
||||
encoding: 'utf-8',
|
||||
|
|
@ -2447,7 +2651,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
}
|
||||
|
||||
case 'workers_load_config': {
|
||||
const configFile = args.file || 'workers.yaml';
|
||||
const configFile = sanitizeShellArg(args.file || 'workers.yaml');
|
||||
try {
|
||||
const result = execSync(`npx agentic-flow@alpha workers load-config --file "${configFile}"`, {
|
||||
encoding: 'utf-8',
|
||||
|
|
@ -2468,6 +2672,244 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||
}
|
||||
}
|
||||
|
||||
// ── RVF Tool Handlers ─────────────────────────────────────────────────
|
||||
case 'rvf_create': {
|
||||
try {
|
||||
const safePath = validateRvfPath(args.path);
|
||||
const { createRvfStore } = require('../dist/core/rvf-wrapper.js');
|
||||
const store = await createRvfStore(safePath, { dimension: args.dimension, metric: args.metric || 'cosine' });
|
||||
const status = store.status ? await store.status() : { dimension: args.dimension };
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: true, path: safePath, ...status }, null, 2) }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message, hint: 'Install @ruvector/rvf: npm install @ruvector/rvf' }, null, 2) }], isError: true };
|
||||
}
|
||||
}
|
||||
|
||||
case 'rvf_open': {
|
||||
try {
|
||||
const safePath = validateRvfPath(args.path);
|
||||
const { openRvfStore, rvfStatus } = require('../dist/core/rvf-wrapper.js');
|
||||
const store = await openRvfStore(safePath);
|
||||
const status = await rvfStatus(store);
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: true, path: safePath, ...status }, null, 2) }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message }, null, 2) }], isError: true };
|
||||
}
|
||||
}
|
||||
|
||||
case 'rvf_ingest': {
|
||||
try {
|
||||
const safePath = validateRvfPath(args.path);
|
||||
const { openRvfStore, rvfIngest, rvfClose } = require('../dist/core/rvf-wrapper.js');
|
||||
const store = await openRvfStore(safePath);
|
||||
const result = await rvfIngest(store, args.entries);
|
||||
await rvfClose(store);
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: true, ...result }, null, 2) }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message }, null, 2) }], isError: true };
|
||||
}
|
||||
}
|
||||
|
||||
case 'rvf_query': {
|
||||
try {
|
||||
const safePath = validateRvfPath(args.path);
|
||||
const { openRvfStore, rvfQuery, rvfClose } = require('../dist/core/rvf-wrapper.js');
|
||||
const store = await openRvfStore(safePath);
|
||||
const results = await rvfQuery(store, args.vector, args.k || 10);
|
||||
await rvfClose(store);
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: true, results }, null, 2) }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message }, null, 2) }], isError: true };
|
||||
}
|
||||
}
|
||||
|
||||
case 'rvf_delete': {
|
||||
try {
|
||||
const safePath = validateRvfPath(args.path);
|
||||
const { openRvfStore, rvfDelete, rvfClose } = require('../dist/core/rvf-wrapper.js');
|
||||
const store = await openRvfStore(safePath);
|
||||
const result = await rvfDelete(store, args.ids);
|
||||
await rvfClose(store);
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: true, ...result }, null, 2) }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message }, null, 2) }], isError: true };
|
||||
}
|
||||
}
|
||||
|
||||
case 'rvf_status': {
|
||||
try {
|
||||
const safePath = validateRvfPath(args.path);
|
||||
const { openRvfStore, rvfStatus, rvfClose } = require('../dist/core/rvf-wrapper.js');
|
||||
const store = await openRvfStore(safePath);
|
||||
const status = await rvfStatus(store);
|
||||
await rvfClose(store);
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: true, ...status }, null, 2) }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message }, null, 2) }], isError: true };
|
||||
}
|
||||
}
|
||||
|
||||
case 'rvf_compact': {
|
||||
try {
|
||||
const safePath = validateRvfPath(args.path);
|
||||
const { openRvfStore, rvfCompact, rvfClose } = require('../dist/core/rvf-wrapper.js');
|
||||
const store = await openRvfStore(safePath);
|
||||
const result = await rvfCompact(store);
|
||||
await rvfClose(store);
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: true, ...result }, null, 2) }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message }, null, 2) }], isError: true };
|
||||
}
|
||||
}
|
||||
|
||||
case 'rvf_derive': {
|
||||
try {
|
||||
const safeParent = validateRvfPath(args.parent_path);
|
||||
const safeChild = validateRvfPath(args.child_path);
|
||||
const { openRvfStore, rvfDerive, rvfClose } = require('../dist/core/rvf-wrapper.js');
|
||||
const store = await openRvfStore(safeParent);
|
||||
await rvfDerive(store, safeChild);
|
||||
await rvfClose(store);
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: true, parent: safeParent, child: safeChild }, null, 2) }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message }, null, 2) }], isError: true };
|
||||
}
|
||||
}
|
||||
|
||||
case 'rvf_segments': {
|
||||
try {
|
||||
const safePath = validateRvfPath(args.path);
|
||||
const { openRvfStore, rvfClose } = require('../dist/core/rvf-wrapper.js');
|
||||
const store = await openRvfStore(safePath);
|
||||
const segs = await store.segments();
|
||||
await rvfClose(store);
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: true, segments: segs }, null, 2) }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({ success: false, error: e.message }, null, 2) }], isError: true };
|
||||
}
|
||||
}
|
||||
|
||||
case 'rvf_examples': {
|
||||
const BASE_URL = 'https://raw.githubusercontent.com/ruvnet/ruvector/main/examples/rvf/output';
|
||||
const examples = [
|
||||
{ name: 'basic_store', size: '152 KB', desc: '1,000 vectors, dim 128' },
|
||||
{ name: 'semantic_search', size: '755 KB', desc: 'Semantic search with HNSW' },
|
||||
{ name: 'rag_pipeline', size: '303 KB', desc: 'RAG pipeline embeddings' },
|
||||
{ name: 'agent_memory', size: '32 KB', desc: 'AI agent episodic memory' },
|
||||
{ name: 'swarm_knowledge', size: '86 KB', desc: 'Multi-agent knowledge base' },
|
||||
{ name: 'self_booting', size: '31 KB', desc: 'Self-booting with kernel' },
|
||||
{ name: 'ebpf_accelerator', size: '153 KB', desc: 'eBPF distance accelerator' },
|
||||
{ name: 'tee_attestation', size: '102 KB', desc: 'TEE attestation + witnesses' },
|
||||
{ name: 'lineage_parent', size: '52 KB', desc: 'COW parent file' },
|
||||
{ name: 'lineage_child', size: '26 KB', desc: 'COW child (derived)' },
|
||||
{ name: 'claude_code_appliance', size: '17 KB', desc: 'Claude Code appliance' },
|
||||
{ name: 'progressive_index', size: '2.5 MB', desc: 'Large-scale HNSW index' },
|
||||
];
|
||||
let filtered = examples;
|
||||
if (args.filter) {
|
||||
const f = args.filter.toLowerCase();
|
||||
filtered = examples.filter(e => e.name.includes(f) || e.desc.toLowerCase().includes(f));
|
||||
}
|
||||
return { content: [{ type: 'text', text: JSON.stringify({
|
||||
success: true,
|
||||
total: 45,
|
||||
shown: filtered.length,
|
||||
examples: filtered.map(e => ({ ...e, url: `${BASE_URL}/${e.name}.rvf` })),
|
||||
catalog: 'https://github.com/ruvnet/ruvector/tree/main/examples/rvf/output'
|
||||
}, null, 2) }] };
|
||||
}
|
||||
|
||||
// ── rvlite Query Tool Handlers ──────────────────────────────────────
|
||||
case 'rvlite_sql': {
|
||||
try {
|
||||
let rvlite;
|
||||
try {
|
||||
rvlite = require('rvlite');
|
||||
} catch (_e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({
|
||||
success: false,
|
||||
error: 'rvlite package not installed',
|
||||
hint: 'Install with: npm install rvlite'
|
||||
}, null, 2) }] };
|
||||
}
|
||||
const safeQuery = sanitizeShellArg(args.query);
|
||||
const dbOpts = args.db_path ? { path: validateRvfPath(args.db_path) } : {};
|
||||
const db = new rvlite.Database(dbOpts);
|
||||
const results = db.sql(safeQuery);
|
||||
return { content: [{ type: 'text', text: JSON.stringify({
|
||||
success: true,
|
||||
query_type: 'sql',
|
||||
results,
|
||||
row_count: Array.isArray(results) ? results.length : 0
|
||||
}, null, 2) }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({
|
||||
success: false,
|
||||
error: e.message
|
||||
}, null, 2) }], isError: true };
|
||||
}
|
||||
}
|
||||
|
||||
case 'rvlite_cypher': {
|
||||
try {
|
||||
let rvlite;
|
||||
try {
|
||||
rvlite = require('rvlite');
|
||||
} catch (_e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({
|
||||
success: false,
|
||||
error: 'rvlite package not installed',
|
||||
hint: 'Install with: npm install rvlite'
|
||||
}, null, 2) }] };
|
||||
}
|
||||
const safeQuery = sanitizeShellArg(args.query);
|
||||
const dbOpts = args.db_path ? { path: validateRvfPath(args.db_path) } : {};
|
||||
const db = new rvlite.Database(dbOpts);
|
||||
const results = db.cypher(safeQuery);
|
||||
return { content: [{ type: 'text', text: JSON.stringify({
|
||||
success: true,
|
||||
query_type: 'cypher',
|
||||
results,
|
||||
row_count: Array.isArray(results) ? results.length : 0
|
||||
}, null, 2) }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({
|
||||
success: false,
|
||||
error: e.message
|
||||
}, null, 2) }], isError: true };
|
||||
}
|
||||
}
|
||||
|
||||
case 'rvlite_sparql': {
|
||||
try {
|
||||
let rvlite;
|
||||
try {
|
||||
rvlite = require('rvlite');
|
||||
} catch (_e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({
|
||||
success: false,
|
||||
error: 'rvlite package not installed',
|
||||
hint: 'Install with: npm install rvlite'
|
||||
}, null, 2) }] };
|
||||
}
|
||||
const safeQuery = sanitizeShellArg(args.query);
|
||||
const dbOpts = args.db_path ? { path: validateRvfPath(args.db_path) } : {};
|
||||
const db = new rvlite.Database(dbOpts);
|
||||
const results = db.sparql(safeQuery);
|
||||
return { content: [{ type: 'text', text: JSON.stringify({
|
||||
success: true,
|
||||
query_type: 'sparql',
|
||||
results,
|
||||
row_count: Array.isArray(results) ? results.length : 0
|
||||
}, null, 2) }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: 'text', text: JSON.stringify({
|
||||
success: false,
|
||||
error: e.message
|
||||
}, null, 2) }], isError: true };
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
return {
|
||||
content: [{
|
||||
|
|
|
|||
|
|
@ -1,14 +1,38 @@
|
|||
# @ruvector/rvf
|
||||
|
||||
Unified TypeScript SDK for the RuVector Format (RVF) cognitive container. A single `.rvf` file stores vectors, carries models, boots services, and proves everything.
|
||||
Unified TypeScript/JavaScript SDK for the **RuVector Format (RVF)** — a cognitive container that stores vectors, carries models, boots compute kernels, and proves everything in a single `.rvf` file.
|
||||
|
||||
## Platform Support
|
||||
|
||||
| Platform | Runtime | Backend | Status |
|
||||
|----------|---------|---------|--------|
|
||||
| Linux x86_64 | Node.js 18+ | Native (N-API) | Stable |
|
||||
| Linux aarch64 | Node.js 18+ | Native (N-API) | Stable |
|
||||
| macOS x86_64 | Node.js 18+ | Native (N-API) | Stable |
|
||||
| macOS arm64 (Apple Silicon) | Node.js 18+ | Native (N-API) | Stable |
|
||||
| Windows x86_64 | Node.js 18+ | Native (N-API) | Stable |
|
||||
| Any | Deno | WASM | Supported |
|
||||
| Any | Browser (Chrome, Firefox, Safari) | WASM | Supported |
|
||||
| Any | Cloudflare Workers / Edge | WASM | Supported |
|
||||
| Any | Bun | Native (N-API) | Experimental |
|
||||
|
||||
**Deno**: The WASM build targets `wasm32-unknown-unknown`, which runs natively in Deno. Import via `npm:` specifier or load the `.wasm` bundle directly.
|
||||
|
||||
**Browser**: The `@ruvector/rvf-wasm` package provides a ~46 KB control-plane WASM module plus a ~5.5 KB tile-compute module. Works in any browser with WebAssembly support.
|
||||
|
||||
## Install
|
||||
|
||||
```bash
|
||||
# Node.js (auto-detects native or WASM)
|
||||
npm install @ruvector/rvf
|
||||
|
||||
# WASM only (browser, Deno, edge)
|
||||
npm install @ruvector/rvf-wasm
|
||||
```
|
||||
|
||||
## Usage
|
||||
## Quick Start
|
||||
|
||||
### Node.js
|
||||
|
||||
```typescript
|
||||
import { RvfDatabase } from '@ruvector/rvf';
|
||||
|
|
@ -27,32 +51,291 @@ console.log(db.fileId()); // unique file UUID
|
|||
console.log(db.dimension()); // 384
|
||||
console.log(db.segments()); // [{ type, id, size }]
|
||||
|
||||
// Derive child (COW branching)
|
||||
const child = db.derive('child.rvf');
|
||||
|
||||
db.close();
|
||||
```
|
||||
|
||||
### Browser (WASM)
|
||||
|
||||
```html
|
||||
<script type="module">
|
||||
import init, { RvfStore } from '@ruvector/rvf-wasm';
|
||||
|
||||
await init();
|
||||
|
||||
const store = RvfStore.create(384, 'cosine');
|
||||
store.ingest(new Float32Array(384), 0);
|
||||
const results = store.query(new Float32Array(384), 10);
|
||||
console.log('Results:', results);
|
||||
</script>
|
||||
```
|
||||
|
||||
### Deno
|
||||
|
||||
```typescript
|
||||
// Import via npm: specifier
|
||||
import init, { RvfStore } from "npm:@ruvector/rvf-wasm";
|
||||
|
||||
await init();
|
||||
|
||||
const store = RvfStore.create(384, 'cosine');
|
||||
store.ingest(new Float32Array(384), 0);
|
||||
const results = store.query(new Float32Array(384), 10);
|
||||
console.log('Results:', results);
|
||||
```
|
||||
|
||||
## What is RVF?
|
||||
|
||||
RVF (RuVector Format) is a universal binary substrate that merges database, model, graph engine, kernel, and attestation into a single deployable file.
|
||||
RVF (RuVector Format) is a universal binary substrate that merges database, model, graph engine, kernel, and attestation into a single deployable file. A `.rvf` file is segmented — each segment carries a different payload type, and unknown segments are preserved by all tools.
|
||||
|
||||
| Capability | Segment |
|
||||
|------------|---------|
|
||||
| Vector storage | VEC_SEG + INDEX_SEG |
|
||||
| LoRA adapters | OVERLAY_SEG |
|
||||
| Graph state | GRAPH_SEG |
|
||||
| Self-boot Linux | KERNEL_SEG |
|
||||
| eBPF acceleration | EBPF_SEG |
|
||||
| Browser queries | WASM_SEG |
|
||||
| Witness chains | WITNESS_SEG + CRYPTO_SEG |
|
||||
| COW branching | COW_MAP + MEMBERSHIP |
|
||||
### Segment Types
|
||||
|
||||
| ID | Segment | Description |
|
||||
|----|---------|-------------|
|
||||
| 0x00 | MANIFEST_SEG | Level0Root manifest with file metadata |
|
||||
| 0x01 | VEC_SEG | Raw vector data (f32, f16, bf16, int8) |
|
||||
| 0x02 | INDEX_SEG | HNSW graph for approximate nearest neighbor |
|
||||
| 0x03 | META_SEG | Vector metadata (JSON, CBOR) |
|
||||
| 0x04 | QUANT_SEG | Quantization codebooks |
|
||||
| 0x05 | OVERLAY_SEG | LoRA/adapter weight overlays |
|
||||
| 0x06 | GRAPH_SEG | Property graph adjacency data |
|
||||
| 0x07 | TENSOR_SEG | Dense tensor data |
|
||||
| 0x08 | WASM_SEG | Embedded WASM modules |
|
||||
| 0x09 | MODEL_SEG | ML model weights |
|
||||
| 0x0A | CRYPTO_SEG | Signatures and key material |
|
||||
| 0x0B | WITNESS_SEG | Append-only witness/audit chain |
|
||||
| 0x0C | CONFIG_SEG | Runtime configuration |
|
||||
| 0x0D | CUSTOM_SEG | User-defined segment |
|
||||
| 0x0E | KERNEL_SEG | Linux microkernel image |
|
||||
| 0x0F | EBPF_SEG | eBPF programs |
|
||||
| 0x20 | COW_MAP_SEG | Copy-on-write cluster map |
|
||||
| 0x21 | REFCOUNT_SEG | Cluster reference counts |
|
||||
| 0x22 | MEMBERSHIP_SEG | Branch membership filter |
|
||||
| 0x23 | DELTA_SEG | Sparse delta patches (LoRA) |
|
||||
|
||||
## N-API Methods (Node.js)
|
||||
|
||||
19 methods on the `RvfDatabase` class:
|
||||
|
||||
| Method | Description |
|
||||
|--------|-------------|
|
||||
| `RvfDatabase.create(path, opts)` | Create new RVF file |
|
||||
| `RvfDatabase.open(path)` | Open existing (read-write) |
|
||||
| `RvfDatabase.openReadonly(path)` | Open existing (read-only) |
|
||||
| `db.ingestBatch(vectors, ids)` | Insert vectors by batch |
|
||||
| `db.query(vector, k)` | k-NN search |
|
||||
| `db.delete(ids)` | Delete vectors by ID |
|
||||
| `db.deleteByFilter(filter)` | Delete vectors matching filter |
|
||||
| `db.compact()` | Compact and reclaim space |
|
||||
| `db.status()` | File status (count, dimension, metric) |
|
||||
| `db.close()` | Close file handle |
|
||||
| `db.fileId()` | UUID of this file |
|
||||
| `db.parentId()` | UUID of parent (if derived) |
|
||||
| `db.lineageDepth()` | Derivation depth |
|
||||
| `db.derive(path)` | COW-branch to new file |
|
||||
| `db.embedKernel(bytes)` | Embed Linux kernel image |
|
||||
| `db.extractKernel()` | Extract kernel image |
|
||||
| `db.embedEbpf(bytes)` | Embed eBPF program |
|
||||
| `db.extractEbpf()` | Extract eBPF program |
|
||||
| `db.segments()` | List all segments |
|
||||
|
||||
## WASM Exports
|
||||
|
||||
29 exported functions for browser and edge runtimes:
|
||||
|
||||
**Control plane** (10): `rvf_create`, `rvf_open`, `rvf_close`, `rvf_ingest`, `rvf_query`, `rvf_delete`, `rvf_status`, `rvf_compact`, `rvf_derive`, `rvf_segments`
|
||||
|
||||
**Tile compute** (14): `tile_dot_f32`, `tile_cosine_f32`, `tile_l2_f32`, `tile_dot_f16`, `tile_cosine_f16`, `tile_l2_f16`, `tile_topk`, `tile_quantize_sq8`, `tile_dequantize_sq8`, `tile_scan_filtered`, `tile_merge_topk`, `tile_batch_distance`, `tile_prefetch`, `tile_accumulate`
|
||||
|
||||
**Segment parsing** (3): `parse_segment_header`, `parse_vec_header`, `parse_manifest`
|
||||
|
||||
**Memory** (2): `rvf_alloc`, `rvf_free`
|
||||
|
||||
## CLI (Rust)
|
||||
|
||||
18 subcommands available through the `rvf` binary:
|
||||
|
||||
```bash
|
||||
# Core operations
|
||||
rvf create vectors.rvf --dimension 384 --metric cosine
|
||||
rvf ingest vectors.rvf --input data.json
|
||||
rvf query vectors.rvf --vector "[0.1,0.2,...]" --k 10
|
||||
rvf delete vectors.rvf --ids "[1,2,3]"
|
||||
rvf status vectors.rvf
|
||||
rvf inspect vectors.rvf
|
||||
rvf compact vectors.rvf
|
||||
|
||||
# Branching & lineage
|
||||
rvf derive vectors.rvf --output child.rvf
|
||||
rvf filter vectors.rvf --include "[1,2,3]"
|
||||
rvf freeze vectors.rvf
|
||||
rvf rebuild-refcounts vectors.rvf
|
||||
|
||||
# Compute containers
|
||||
rvf serve vectors.rvf --port 8080
|
||||
rvf launch vectors.rvf
|
||||
rvf embed-kernel vectors.rvf --image bzImage
|
||||
rvf embed-ebpf vectors.rvf --program filter.o
|
||||
|
||||
# Verification
|
||||
rvf verify-witness vectors.rvf
|
||||
rvf verify-attestation vectors.rvf
|
||||
|
||||
# Export
|
||||
rvf export vectors.rvf --output dump.json
|
||||
```
|
||||
|
||||
Build the CLI:
|
||||
|
||||
```bash
|
||||
cargo install --path crates/rvf/rvf-cli
|
||||
```
|
||||
|
||||
## Example .rvf Files
|
||||
|
||||
45 pre-built example files are available for download (~11 MB total). These demonstrate every segment type and use case.
|
||||
|
||||
### Download
|
||||
|
||||
```bash
|
||||
# Download a specific example
|
||||
curl -LO https://raw.githubusercontent.com/ruvnet/ruvector/main/examples/rvf/output/basic_store.rvf
|
||||
|
||||
# Clone just the examples
|
||||
git clone --depth 1 --filter=blob:none --sparse https://github.com/ruvnet/ruvector.git
|
||||
cd ruvector && git sparse-checkout set examples/rvf/output
|
||||
```
|
||||
|
||||
### Example Catalog
|
||||
|
||||
| File | Size | Description |
|
||||
|------|------|-------------|
|
||||
| `basic_store.rvf` | 152 KB | 1,000 vectors, dim 128, cosine metric |
|
||||
| `semantic_search.rvf` | 755 KB | Semantic search with HNSW index |
|
||||
| `rag_pipeline.rvf` | 303 KB | RAG pipeline with embeddings |
|
||||
| `embedding_cache.rvf` | 755 KB | Cached embedding store |
|
||||
| `quantization.rvf` | 1.5 MB | PQ-compressed vectors |
|
||||
| `progressive_index.rvf` | 2.5 MB | Large-scale progressive HNSW index |
|
||||
| `filtered_search.rvf` | 255 KB | Metadata-filtered vector search |
|
||||
| `recommendation.rvf` | 102 KB | Recommendation engine vectors |
|
||||
| `agent_memory.rvf` | 32 KB | AI agent episodic memory |
|
||||
| `swarm_knowledge.rvf` | 86 KB | Multi-agent shared knowledge base |
|
||||
| `experience_replay.rvf` | 27 KB | RL experience replay buffer |
|
||||
| `tool_cache.rvf` | 26 KB | MCP tool call cache |
|
||||
| `mcp_in_rvf.rvf` | 32 KB | MCP server embedded in RVF |
|
||||
| `ruvbot.rvf` | 51 KB | Chatbot knowledge store |
|
||||
| `claude_code_appliance.rvf` | 17 KB | Claude Code cognitive appliance |
|
||||
| `lineage_parent.rvf` | 52 KB | COW parent file |
|
||||
| `lineage_child.rvf` | 26 KB | COW child (derived) file |
|
||||
| `reasoning_parent.rvf` | 5.6 KB | Reasoning chain parent |
|
||||
| `reasoning_child.rvf` | 8.1 KB | Reasoning chain child |
|
||||
| `reasoning_grandchild.rvf` | 162 B | Minimal derived file |
|
||||
| `self_booting.rvf` | 31 KB | Self-booting with KERNEL_SEG |
|
||||
| `linux_microkernel.rvf` | 15 KB | Embedded Linux microkernel |
|
||||
| `ebpf_accelerator.rvf` | 153 KB | eBPF distance accelerator |
|
||||
| `browser_wasm.rvf` | 14 KB | Browser WASM module embedded |
|
||||
| `tee_attestation.rvf` | 102 KB | TEE attestation with witnesses |
|
||||
| `zero_knowledge.rvf` | 52 KB | ZK-proof witness chain |
|
||||
| `crypto_signed.rvf` | (see `sealed_engine.rvf`) | Signed + sealed |
|
||||
| `sealed_engine.rvf` | 208 KB | Sealed inference engine |
|
||||
| `access_control.rvf` | 77 KB | Permission-gated vectors |
|
||||
| `financial_signals.rvf` | 202 KB | Financial signal vectors |
|
||||
| `medical_imaging.rvf` | 302 KB | Medical imaging embeddings |
|
||||
| `legal_discovery.rvf` | 903 KB | Legal document discovery |
|
||||
| `multimodal_fusion.rvf` | 804 KB | Multi-modal embedding fusion |
|
||||
| `hyperbolic_taxonomy.rvf` | 23 KB | Hyperbolic space taxonomy |
|
||||
| `network_telemetry.rvf` | 16 KB | Network telemetry vectors |
|
||||
| `postgres_bridge.rvf` | 152 KB | PostgreSQL bridge vectors |
|
||||
| `ruvllm_inference.rvf` | 133 KB | RuvLLM inference cache |
|
||||
| `serverless.rvf` | 509 KB | Serverless deployment bundle |
|
||||
| `edge_iot.rvf` | 27 KB | Edge/IoT lightweight store |
|
||||
| `dedup_detector.rvf` | 153 KB | Deduplication detector |
|
||||
| `compacted.rvf` | 77 KB | Post-compaction example |
|
||||
| `posix_fileops.rvf` | 52 KB | POSIX file operations test |
|
||||
| `network_sync_a.rvf` | 52 KB | Network sync peer A |
|
||||
| `network_sync_b.rvf` | 52 KB | Network sync peer B |
|
||||
| `agent_handoff_a.rvf` | 31 KB | Agent handoff source |
|
||||
| `agent_handoff_b.rvf` | 11 KB | Agent handoff target |
|
||||
|
||||
### Generate Examples Locally
|
||||
|
||||
```bash
|
||||
cd crates/rvf
|
||||
cargo run --example generate_all
|
||||
ls output/ # 45 .rvf files
|
||||
```
|
||||
|
||||
## Integration
|
||||
|
||||
### With `ruvector` (npx ruvector)
|
||||
|
||||
The `ruvector` npm package includes 8 RVF CLI commands:
|
||||
|
||||
```bash
|
||||
npm install ruvector @ruvector/rvf
|
||||
|
||||
# Enable RVF backend
|
||||
export RUVECTOR_BACKEND=rvf
|
||||
|
||||
# Or use --backend flag
|
||||
npx ruvector --backend rvf create mydb.rvf -d 384
|
||||
|
||||
# RVF-specific commands
|
||||
npx ruvector rvf create mydb.rvf -d 384
|
||||
npx ruvector rvf ingest mydb.rvf --input data.json
|
||||
npx ruvector rvf query mydb.rvf --vector "[0.1,...]" --k 10
|
||||
npx ruvector rvf status mydb.rvf
|
||||
npx ruvector rvf segments mydb.rvf
|
||||
npx ruvector rvf derive mydb.rvf --output child.rvf
|
||||
npx ruvector rvf compact mydb.rvf
|
||||
npx ruvector rvf export mydb.rvf --output dump.json
|
||||
```
|
||||
|
||||
### With `rvlite`
|
||||
|
||||
```bash
|
||||
npm install rvlite @ruvector/rvf-wasm
|
||||
```
|
||||
|
||||
When `@ruvector/rvf-wasm` is installed, rvlite can use RVF as a persistent storage backend:
|
||||
|
||||
```typescript
|
||||
import { createRvLite } from 'rvlite';
|
||||
|
||||
// rvlite auto-detects @ruvector/rvf-wasm for persistence
|
||||
const db = await createRvLite({ dimensions: 384 });
|
||||
await db.insert([0.1, 0.2, ...], { text: "Hello world" });
|
||||
const results = await db.search([0.1, 0.2, ...], 5);
|
||||
```
|
||||
|
||||
## Packages
|
||||
|
||||
| Package | Description |
|
||||
|---------|-------------|
|
||||
| `@ruvector/rvf` | Unified SDK (this package) |
|
||||
| `@ruvector/rvf-node` | Native N-API bindings |
|
||||
| `@ruvector/rvf-wasm` | WASM build for browsers |
|
||||
| `@ruvector/rvf-mcp-server` | MCP server for AI agents |
|
||||
| Package | Description | Runtime |
|
||||
|---------|-------------|---------|
|
||||
| `@ruvector/rvf` | Unified SDK (this package) | Node.js |
|
||||
| `@ruvector/rvf-node` | Native N-API bindings | Node.js |
|
||||
| `@ruvector/rvf-wasm` | WASM build (~46 KB + ~5.5 KB tile) | Browser, Deno, Edge |
|
||||
| `@ruvector/rvf-mcp-server` | MCP server for AI agents | Node.js |
|
||||
|
||||
## Crate Structure (Rust)
|
||||
|
||||
| Crate | Description |
|
||||
|-------|-------------|
|
||||
| `rvf-types` | Wire types, segment headers, `no_std` compatible |
|
||||
| `rvf-wire` | Serialization/deserialization |
|
||||
| `rvf-manifest` | Level0Root manifest parsing |
|
||||
| `rvf-index` | HNSW index operations |
|
||||
| `rvf-quant` | Quantization codebooks |
|
||||
| `rvf-crypto` | Signing, verification, key management |
|
||||
| `rvf-runtime` | Full runtime (store, ingest, query, derive) |
|
||||
| `rvf-kernel` | Linux microkernel builder |
|
||||
| `rvf-launch` | QEMU launcher for self-booting files |
|
||||
| `rvf-ebpf` | eBPF compiler and loader |
|
||||
| `rvf-server` | HTTP API server (axum) |
|
||||
| `rvf-cli` | CLI binary |
|
||||
| `rvf-import` | Import from external formats |
|
||||
|
||||
## License
|
||||
|
||||
|
|
|
|||
|
|
@ -197,6 +197,68 @@ const similar = await memory.query("What was the weather question?", queryEmbedd
|
|||
const related = await memory.findRelated("conv-1", 2);
|
||||
```
|
||||
|
||||
## RVF Storage Backend
|
||||
|
||||
RvLite can use [RVF (RuVector Format)](https://github.com/ruvnet/ruvector/tree/main/crates/rvf) as a persistent storage backend. When the optional `@ruvector/rvf-wasm` package is installed, rvlite gains file-backed persistence using the `.rvf` cognitive container format.
|
||||
|
||||
### Install
|
||||
|
||||
```bash
|
||||
npm install rvlite @ruvector/rvf-wasm
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
```typescript
|
||||
import { createRvLite } from 'rvlite';
|
||||
|
||||
// rvlite auto-detects @ruvector/rvf-wasm when installed
|
||||
const db = await createRvLite({ dimensions: 384 });
|
||||
|
||||
// All operations persist to RVF format
|
||||
await db.insert([0.1, 0.2, ...], { text: "Hello world" });
|
||||
const results = await db.search([0.1, 0.2, ...], 5);
|
||||
```
|
||||
|
||||
### Platform Support
|
||||
|
||||
The RVF backend works everywhere rvlite runs:
|
||||
|
||||
| Platform | RVF Backend | Notes |
|
||||
|----------|-------------|-------|
|
||||
| Node.js (Linux, macOS, Windows) | Native or WASM | Auto-detected |
|
||||
| Browser (Chrome, Firefox, Safari) | WASM | IndexedDB + RVF |
|
||||
| Deno | WASM | Via `npm:` specifier |
|
||||
| Cloudflare Workers / Edge | WASM | Stateless queries |
|
||||
|
||||
### Rust Feature Flag
|
||||
|
||||
If building from source, enable the `rvf-backend` feature in `crates/rvlite`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
rvlite = { version = "0.1", features = ["rvf-backend"] }
|
||||
```
|
||||
|
||||
This enables epoch-based reconciliation between RVF and metadata stores:
|
||||
- Monotonic epoch counter shared between RVF and metadata
|
||||
- On startup, compares epochs and rebuilds the lagging side
|
||||
- RVF file is source of truth; metadata (IndexedDB) is rebuildable cache
|
||||
|
||||
### Download Example .rvf Files
|
||||
|
||||
```bash
|
||||
# Download pre-built examples to test with
|
||||
curl -LO https://raw.githubusercontent.com/ruvnet/ruvector/main/examples/rvf/output/basic_store.rvf
|
||||
curl -LO https://raw.githubusercontent.com/ruvnet/ruvector/main/examples/rvf/output/semantic_search.rvf
|
||||
curl -LO https://raw.githubusercontent.com/ruvnet/ruvector/main/examples/rvf/output/agent_memory.rvf
|
||||
|
||||
# 45 examples available at:
|
||||
# https://github.com/ruvnet/ruvector/tree/main/examples/rvf/output
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Integration with claude-flow
|
||||
|
||||
RvLite can enhance claude-flow's memory system with semantic search:
|
||||
|
|
|
|||
|
|
@ -71,11 +71,15 @@
|
|||
"@types/node": "^20.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@anthropic-ai/sdk": ">=0.20.0"
|
||||
"@anthropic-ai/sdk": ">=0.20.0",
|
||||
"@ruvector/rvf-wasm": ">=0.1.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@anthropic-ai/sdk": {
|
||||
"optional": true
|
||||
},
|
||||
"@ruvector/rvf-wasm": {
|
||||
"optional": true
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
|
|
|
|||
362
npm/packages/rvlite/src/cli-rvf.ts
Normal file
362
npm/packages/rvlite/src/cli-rvf.ts
Normal file
|
|
@ -0,0 +1,362 @@
|
|||
/**
|
||||
* cli-rvf.ts - RVF migration and rebuild CLI commands
|
||||
*
|
||||
* Two commands:
|
||||
* rvf-migrate — Convert existing rvlite data to RVF format
|
||||
* rvf-rebuild — Reconstruct metadata from an RVF file
|
||||
*
|
||||
* Usage (via the rvlite CLI binary or directly):
|
||||
* rvlite rvf-migrate --source .rvlite/db.json --dest data.rvf [--dry-run] [--verify]
|
||||
* rvlite rvf-rebuild --source data.rvf [--dest .rvlite/db.json]
|
||||
*/
|
||||
|
||||
// ── Types ────────────────────────────────────────────────────────────────
|
||||
|
||||
/** Shape of the JSON-based rvlite database state (as saved by the CLI). */
|
||||
interface RvLiteDbState {
|
||||
vectors: Record<string, {
|
||||
vector: number[];
|
||||
metadata?: Record<string, unknown>;
|
||||
norm?: number;
|
||||
}>;
|
||||
graph?: {
|
||||
nodes?: Record<string, unknown>;
|
||||
edges?: Record<string, unknown>;
|
||||
};
|
||||
triples?: Array<{ subject: string; predicate: string; object: string }>;
|
||||
nextId?: number;
|
||||
config?: {
|
||||
dimensions?: number;
|
||||
metric?: string;
|
||||
};
|
||||
}
|
||||
|
||||
/** JSON-based RVF file envelope. */
|
||||
interface RvfFileEnvelope {
|
||||
rvf_version: number;
|
||||
magic: 'RVF1';
|
||||
created_at: string;
|
||||
dimensions: number;
|
||||
distance_metric: string;
|
||||
payload: RvLiteDbState;
|
||||
}
|
||||
|
||||
/** Summary report returned by migrate / rebuild. */
|
||||
export interface MigrateReport {
|
||||
vectorsMigrated: number;
|
||||
triplesMigrated: number;
|
||||
graphNodesMigrated: number;
|
||||
graphEdgesMigrated: number;
|
||||
skipped: boolean;
|
||||
dryRun: boolean;
|
||||
verifyPassed?: boolean;
|
||||
}
|
||||
|
||||
export interface RebuildReport {
|
||||
vectorsRecovered: number;
|
||||
triplesRecovered: number;
|
||||
graphNodesRecovered: number;
|
||||
graphEdgesRecovered: number;
|
||||
}
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
function vectorsClose(a: number[], b: number[], tolerance: number): boolean {
|
||||
if (a.length !== b.length) return false;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
if (Math.abs(a[i] - b[i]) > tolerance) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ── Migrate ──────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Convert an existing rvlite JSON database into an RVF file.
|
||||
*
|
||||
* @param sourcePath - Path to the rvlite JSON database (e.g., .rvlite/db.json).
|
||||
* @param destPath - Destination path for the RVF file.
|
||||
* @param options - Migration options.
|
||||
* @returns A report summarising the migration.
|
||||
*/
|
||||
export async function rvfMigrate(
|
||||
sourcePath: string,
|
||||
destPath: string,
|
||||
options: { dryRun?: boolean; verify?: boolean } = {}
|
||||
): Promise<MigrateReport> {
|
||||
const fs = await import('fs');
|
||||
|
||||
if (!fs.existsSync(sourcePath)) {
|
||||
throw new Error(`Source file not found: ${sourcePath}`);
|
||||
}
|
||||
|
||||
const raw = fs.readFileSync(sourcePath, 'utf-8');
|
||||
const state: RvLiteDbState = JSON.parse(raw);
|
||||
|
||||
// Idempotency: if dest already exists and is a valid RVF file whose
|
||||
// payload matches the source, treat as a no-op.
|
||||
if (fs.existsSync(destPath)) {
|
||||
try {
|
||||
const existing = JSON.parse(fs.readFileSync(destPath, 'utf-8')) as RvfFileEnvelope;
|
||||
if (existing.magic === 'RVF1') {
|
||||
const existingVecCount = Object.keys(existing.payload?.vectors ?? {}).length;
|
||||
const sourceVecCount = Object.keys(state.vectors ?? {}).length;
|
||||
if (existingVecCount === sourceVecCount) {
|
||||
return {
|
||||
vectorsMigrated: 0,
|
||||
triplesMigrated: 0,
|
||||
graphNodesMigrated: 0,
|
||||
graphEdgesMigrated: 0,
|
||||
skipped: true,
|
||||
dryRun: options.dryRun ?? false,
|
||||
};
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// File exists but is not valid RVF — proceed with migration.
|
||||
}
|
||||
}
|
||||
|
||||
const vectorCount = Object.keys(state.vectors ?? {}).length;
|
||||
const tripleCount = (state.triples ?? []).length;
|
||||
const nodeCount = Object.keys(state.graph?.nodes ?? {}).length;
|
||||
const edgeCount = Object.keys(state.graph?.edges ?? {}).length;
|
||||
|
||||
if (options.dryRun) {
|
||||
return {
|
||||
vectorsMigrated: vectorCount,
|
||||
triplesMigrated: tripleCount,
|
||||
graphNodesMigrated: nodeCount,
|
||||
graphEdgesMigrated: edgeCount,
|
||||
skipped: false,
|
||||
dryRun: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Build the RVF envelope.
|
||||
const envelope: RvfFileEnvelope = {
|
||||
rvf_version: 1,
|
||||
magic: 'RVF1',
|
||||
created_at: new Date().toISOString(),
|
||||
dimensions: state.config?.dimensions ?? 384,
|
||||
distance_metric: state.config?.metric ?? 'cosine',
|
||||
payload: state,
|
||||
};
|
||||
|
||||
const path = await import('path');
|
||||
const dir = path.dirname(destPath);
|
||||
if (dir && !fs.existsSync(dir)) {
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
|
||||
fs.writeFileSync(destPath, JSON.stringify(envelope, null, 2), 'utf-8');
|
||||
|
||||
// Optionally verify round-trip fidelity.
|
||||
let verifyPassed: boolean | undefined;
|
||||
if (options.verify) {
|
||||
const reRead = JSON.parse(fs.readFileSync(destPath, 'utf-8')) as RvfFileEnvelope;
|
||||
verifyPassed = true;
|
||||
|
||||
for (const [id, entry] of Object.entries(state.vectors ?? {})) {
|
||||
const rvfEntry = reRead.payload.vectors?.[id];
|
||||
if (!rvfEntry) {
|
||||
verifyPassed = false;
|
||||
break;
|
||||
}
|
||||
if (!vectorsClose(entry.vector, rvfEntry.vector, 1e-6)) {
|
||||
verifyPassed = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
vectorsMigrated: vectorCount,
|
||||
triplesMigrated: tripleCount,
|
||||
graphNodesMigrated: nodeCount,
|
||||
graphEdgesMigrated: edgeCount,
|
||||
skipped: false,
|
||||
dryRun: false,
|
||||
verifyPassed,
|
||||
};
|
||||
}
|
||||
|
||||
// ── Rebuild ──────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Reconstruct metadata from an RVF file.
|
||||
*
|
||||
* Reads the RVF envelope, extracts vectors, and rebuilds
|
||||
* SQL / Cypher / SPARQL metadata from vector metadata fields.
|
||||
*
|
||||
* @param sourcePath - Path to the RVF file.
|
||||
* @param destPath - Optional destination for the rebuilt JSON state.
|
||||
* @returns A report summarising the recovered data.
|
||||
*/
|
||||
export async function rvfRebuild(
|
||||
sourcePath: string,
|
||||
destPath?: string
|
||||
): Promise<RebuildReport> {
|
||||
const fs = await import('fs');
|
||||
|
||||
if (!fs.existsSync(sourcePath)) {
|
||||
throw new Error(`RVF file not found: ${sourcePath}`);
|
||||
}
|
||||
|
||||
const raw = fs.readFileSync(sourcePath, 'utf-8');
|
||||
const envelope = JSON.parse(raw) as RvfFileEnvelope;
|
||||
|
||||
if (envelope.magic !== 'RVF1') {
|
||||
throw new Error(`Invalid RVF file: expected magic "RVF1", got "${envelope.magic}"`);
|
||||
}
|
||||
|
||||
const state = envelope.payload;
|
||||
|
||||
// Rebuild graph nodes from vectors that have graph-like metadata.
|
||||
const recoveredNodes: Record<string, unknown> = {};
|
||||
const recoveredEdges: Record<string, unknown> = {};
|
||||
const recoveredTriples: Array<{ subject: string; predicate: string; object: string }> = [];
|
||||
|
||||
for (const [id, entry] of Object.entries(state.vectors ?? {})) {
|
||||
const meta = entry.metadata;
|
||||
if (!meta) continue;
|
||||
|
||||
// Recover graph nodes: metadata with a `_label` field.
|
||||
if (typeof meta._label === 'string') {
|
||||
recoveredNodes[id] = { label: meta._label, properties: meta };
|
||||
}
|
||||
|
||||
// Recover graph edges: metadata with `_from` and `_to`.
|
||||
if (typeof meta._from === 'string' && typeof meta._to === 'string') {
|
||||
recoveredEdges[id] = {
|
||||
from: meta._from,
|
||||
to: meta._to,
|
||||
type: meta._type ?? 'RELATED',
|
||||
properties: meta,
|
||||
};
|
||||
}
|
||||
|
||||
// Recover triples: metadata with `_subject`, `_predicate`, `_object`.
|
||||
if (
|
||||
typeof meta._subject === 'string' &&
|
||||
typeof meta._predicate === 'string' &&
|
||||
typeof meta._object === 'string'
|
||||
) {
|
||||
recoveredTriples.push({
|
||||
subject: meta._subject,
|
||||
predicate: meta._predicate,
|
||||
object: meta._object,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Merge recovered data with any existing data in the envelope.
|
||||
const existingTriples = state.triples ?? [];
|
||||
const allTriples = [...existingTriples, ...recoveredTriples];
|
||||
|
||||
const existingNodes = state.graph?.nodes ?? {};
|
||||
const existingEdges = state.graph?.edges ?? {};
|
||||
const allNodes = { ...existingNodes, ...recoveredNodes };
|
||||
const allEdges = { ...existingEdges, ...recoveredEdges };
|
||||
|
||||
const rebuiltState: RvLiteDbState = {
|
||||
vectors: state.vectors ?? {},
|
||||
graph: { nodes: allNodes, edges: allEdges },
|
||||
triples: allTriples,
|
||||
nextId: state.nextId ?? Object.keys(state.vectors ?? {}).length + 1,
|
||||
config: {
|
||||
dimensions: envelope.dimensions,
|
||||
metric: envelope.distance_metric,
|
||||
},
|
||||
};
|
||||
|
||||
if (destPath) {
|
||||
const path = await import('path');
|
||||
const dir = path.dirname(destPath);
|
||||
if (dir && !fs.existsSync(dir)) {
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
fs.writeFileSync(destPath, JSON.stringify(rebuiltState, null, 2), 'utf-8');
|
||||
}
|
||||
|
||||
return {
|
||||
vectorsRecovered: Object.keys(state.vectors ?? {}).length,
|
||||
triplesRecovered: allTriples.length,
|
||||
graphNodesRecovered: Object.keys(allNodes).length,
|
||||
graphEdgesRecovered: Object.keys(allEdges).length,
|
||||
};
|
||||
}
|
||||
|
||||
// ── CLI Entry Point ──────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Register rvf-migrate and rvf-rebuild commands on a Commander program
|
||||
* instance. This allows the main rvlite CLI to integrate these commands
|
||||
* without duplicating code.
|
||||
*/
|
||||
export function registerRvfCommands(program: any): void {
|
||||
program
|
||||
.command('rvf-migrate')
|
||||
.description('Convert existing rvlite data to RVF format')
|
||||
.requiredOption('-s, --source <path>', 'Path to source rvlite JSON database')
|
||||
.requiredOption('-d, --dest <path>', 'Destination RVF file path')
|
||||
.option('--dry-run', 'Report what would be migrated without writing', false)
|
||||
.option('--verify', 'Verify vectors match within 1e-6 tolerance after migration', false)
|
||||
.action(async (options: { source: string; dest: string; dryRun: boolean; verify: boolean }) => {
|
||||
try {
|
||||
const report = await rvfMigrate(options.source, options.dest, {
|
||||
dryRun: options.dryRun,
|
||||
verify: options.verify,
|
||||
});
|
||||
|
||||
if (report.skipped) {
|
||||
console.log('Migration skipped: destination already contains matching RVF data (idempotent).');
|
||||
return;
|
||||
}
|
||||
|
||||
if (report.dryRun) {
|
||||
console.log('Dry run — no files written.');
|
||||
}
|
||||
|
||||
console.log(`Vectors migrated: ${report.vectorsMigrated}`);
|
||||
console.log(`Triples migrated: ${report.triplesMigrated}`);
|
||||
console.log(`Graph nodes migrated: ${report.graphNodesMigrated}`);
|
||||
console.log(`Graph edges migrated: ${report.graphEdgesMigrated}`);
|
||||
|
||||
if (report.verifyPassed !== undefined) {
|
||||
console.log(`Verification: ${report.verifyPassed ? 'PASSED' : 'FAILED'}`);
|
||||
if (!report.verifyPassed) {
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.error(`Error: ${msg}`);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
program
|
||||
.command('rvf-rebuild')
|
||||
.description('Reconstruct metadata from RVF file')
|
||||
.requiredOption('-s, --source <path>', 'Path to source RVF file')
|
||||
.option('-d, --dest <path>', 'Destination JSON file for rebuilt state')
|
||||
.action(async (options: { source: string; dest?: string }) => {
|
||||
try {
|
||||
const report = await rvfRebuild(options.source, options.dest);
|
||||
|
||||
console.log(`Vectors recovered: ${report.vectorsRecovered}`);
|
||||
console.log(`Triples recovered: ${report.triplesRecovered}`);
|
||||
console.log(`Graph nodes recovered: ${report.graphNodesRecovered}`);
|
||||
console.log(`Graph edges recovered: ${report.graphEdgesRecovered}`);
|
||||
|
||||
if (options.dest) {
|
||||
console.log(`Rebuilt state written to: ${options.dest}`);
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.error(`Error: ${msg}`);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
@ -33,9 +33,40 @@
|
|||
// Re-export WASM module for advanced usage
|
||||
export * from '../dist/wasm/rvlite.js';
|
||||
|
||||
// ── RVF Backend Detection ─────────────────────────────────────────────────
|
||||
|
||||
let rvfWasmAvailable: boolean | null = null;
|
||||
|
||||
/**
|
||||
* Check if @ruvector/rvf-wasm is installed for persistent RVF storage.
|
||||
*/
|
||||
export function isRvfAvailable(): boolean {
|
||||
if (rvfWasmAvailable !== null) return rvfWasmAvailable;
|
||||
try {
|
||||
require.resolve('@ruvector/rvf-wasm');
|
||||
rvfWasmAvailable = true;
|
||||
} catch {
|
||||
rvfWasmAvailable = false;
|
||||
}
|
||||
return rvfWasmAvailable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the active storage backend.
|
||||
*/
|
||||
export function getStorageBackend(): 'rvf' | 'indexeddb' | 'memory' {
|
||||
if (isRvfAvailable()) return 'rvf';
|
||||
if (typeof indexedDB !== 'undefined') return 'indexeddb';
|
||||
return 'memory';
|
||||
}
|
||||
|
||||
export interface RvLiteConfig {
|
||||
dimensions?: number;
|
||||
distanceMetric?: 'cosine' | 'euclidean' | 'dotproduct';
|
||||
/** Force a specific storage backend. Auto-detected if omitted. */
|
||||
backend?: 'rvf' | 'indexeddb' | 'memory' | 'auto';
|
||||
/** Path to RVF file for persistent storage. */
|
||||
rvfPath?: string;
|
||||
}
|
||||
|
||||
export interface SearchResult {
|
||||
|
|
@ -263,14 +294,164 @@ export class RvLite {
|
|||
const wasmModule = await import('../dist/wasm/rvlite.js');
|
||||
return wasmModule.RvLite.clear_storage();
|
||||
}
|
||||
|
||||
// ============ RVF Persistence ============
|
||||
|
||||
/**
|
||||
* Factory method: create an RvLite instance backed by an RVF file.
|
||||
*
|
||||
* Opens or creates an RVF file at the given path, initialises the WASM
|
||||
* module, and (when available) uses `@ruvector/rvf-wasm` for vector storage.
|
||||
* Falls back to standard WASM + JSON-based RVF if the optional package is
|
||||
* not installed.
|
||||
*
|
||||
* @param config - Standard RvLiteConfig plus a required `rvfPath`.
|
||||
* @returns A fully-initialised RvLite instance with data loaded from the
|
||||
* RVF file (if it already exists).
|
||||
*/
|
||||
static async createWithRvf(
|
||||
config: RvLiteConfig & { rvfPath: string }
|
||||
): Promise<RvLite> {
|
||||
const instance = new RvLite(config);
|
||||
instance.rvfPath = config.rvfPath;
|
||||
|
||||
// Attempt to use @ruvector/rvf-wasm for native RVF I/O
|
||||
try {
|
||||
const rvfWasm = await import('@ruvector/rvf-wasm' as string);
|
||||
instance.rvfModule = rvfWasm;
|
||||
} catch {
|
||||
// Optional dependency not available — fall back to JSON-based RVF.
|
||||
}
|
||||
|
||||
await instance.init();
|
||||
|
||||
// If the file exists on disk, load its content.
|
||||
if (typeof globalThis.process !== 'undefined') {
|
||||
try {
|
||||
const fs = await import('fs' as string);
|
||||
if (fs.existsSync(config.rvfPath)) {
|
||||
await instance.loadFromRvf(config.rvfPath);
|
||||
}
|
||||
} catch {
|
||||
// Browser or other environment — skip file check.
|
||||
}
|
||||
}
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Export the current vector state to an RVF file.
|
||||
*
|
||||
* When `@ruvector/rvf-wasm` is available the export uses the native RVF
|
||||
* binary writer. Otherwise the method falls back to a JSON payload
|
||||
* wrapped with RVF header metadata so the file can be identified as RVF.
|
||||
*
|
||||
* @param filePath - Destination path for the RVF file.
|
||||
*/
|
||||
async saveToRvf(filePath: string): Promise<void> {
|
||||
await this.ensureInit();
|
||||
|
||||
const jsonState = await this.exportJson();
|
||||
|
||||
// Prefer native RVF writer when available.
|
||||
if (this.rvfModule && typeof this.rvfModule.writeRvf === 'function') {
|
||||
await this.rvfModule.writeRvf(filePath, jsonState);
|
||||
return;
|
||||
}
|
||||
|
||||
// Fallback: JSON with RVF envelope
|
||||
const rvfEnvelope: RvfFileEnvelope = {
|
||||
rvf_version: 1,
|
||||
magic: 'RVF1',
|
||||
created_at: new Date().toISOString(),
|
||||
dimensions: this.config.dimensions ?? 384,
|
||||
distance_metric: this.config.distanceMetric ?? 'cosine',
|
||||
payload: jsonState,
|
||||
};
|
||||
|
||||
if (typeof globalThis.process !== 'undefined') {
|
||||
const fs = await import('fs' as string);
|
||||
const path = await import('path' as string);
|
||||
const dir = path.dirname(filePath);
|
||||
if (!fs.existsSync(dir)) {
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
fs.writeFileSync(filePath, JSON.stringify(rvfEnvelope, null, 2), 'utf-8');
|
||||
} else {
|
||||
throw new Error(
|
||||
'saveToRvf is only supported in Node.js environments. ' +
|
||||
'Use exportJson() for browser-side persistence.'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Import vector data from an RVF file.
|
||||
*
|
||||
* Parses the RVF format (either native binary via `@ruvector/rvf-wasm` or
|
||||
* the JSON-based fallback envelope) and loads vectors + metadata into the
|
||||
* current instance.
|
||||
*
|
||||
* @param filePath - Source path of the RVF file to import.
|
||||
*/
|
||||
async loadFromRvf(filePath: string): Promise<void> {
|
||||
await this.ensureInit();
|
||||
|
||||
// Prefer native RVF reader.
|
||||
if (this.rvfModule && typeof this.rvfModule.readRvf === 'function') {
|
||||
const data = await this.rvfModule.readRvf(filePath);
|
||||
await this.importJson(data);
|
||||
return;
|
||||
}
|
||||
|
||||
// Fallback: read JSON envelope.
|
||||
if (typeof globalThis.process !== 'undefined') {
|
||||
const fs = await import('fs' as string);
|
||||
if (!fs.existsSync(filePath)) {
|
||||
throw new Error(`RVF file not found: ${filePath}`);
|
||||
}
|
||||
const raw = fs.readFileSync(filePath, 'utf-8');
|
||||
const envelope = JSON.parse(raw) as RvfFileEnvelope;
|
||||
|
||||
if (envelope.magic !== 'RVF1') {
|
||||
throw new Error(
|
||||
`Invalid RVF file: expected magic "RVF1", got "${envelope.magic}"`
|
||||
);
|
||||
}
|
||||
|
||||
await this.importJson(envelope.payload);
|
||||
} else {
|
||||
throw new Error(
|
||||
'loadFromRvf is only supported in Node.js environments. ' +
|
||||
'Use importJson() for browser-side persistence.'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/** @internal handle to optional @ruvector/rvf-wasm module */
|
||||
private rvfModule: any = null;
|
||||
/** @internal path to the RVF backing file (set by createWithRvf) */
|
||||
private rvfPath: string | null = null;
|
||||
}
|
||||
|
||||
// ============ Convenience Functions ============
|
||||
|
||||
/**
|
||||
* Create a new RvLite instance (async factory)
|
||||
* Create a new RvLite instance (async factory).
|
||||
*
|
||||
* When `@ruvector/rvf-wasm` is installed, persistence uses RVF format.
|
||||
* Override with `config.backend` to force a specific backend.
|
||||
*/
|
||||
export async function createRvLite(config: RvLiteConfig = {}): Promise<RvLite> {
|
||||
const requestedBackend = config.backend || 'auto';
|
||||
const actualBackend = requestedBackend === 'auto' ? getStorageBackend() : requestedBackend;
|
||||
|
||||
// Log backend selection (useful for debugging)
|
||||
if (typeof process !== 'undefined' && process.env && process.env.RVLITE_DEBUG) {
|
||||
console.log(`[rvlite] storage backend: ${actualBackend} (requested: ${requestedBackend}, rvf available: ${isRvfAvailable()})`);
|
||||
}
|
||||
|
||||
const db = new RvLite(config);
|
||||
await db.init();
|
||||
return db;
|
||||
|
|
@ -295,6 +476,27 @@ export function createAnthropicEmbeddings(apiKey?: string): EmbeddingProvider {
|
|||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize a string for safe use in Cypher queries.
|
||||
*/
|
||||
function sanitizeCypher(value: string): string {
|
||||
return value
|
||||
.replace(/\\/g, '\\\\')
|
||||
.replace(/"/g, '\\"')
|
||||
.replace(/'/g, "\\'")
|
||||
.replace(/[\x00-\x1f\x7f]/g, '');
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate a Cypher relationship type (alphanumeric + underscores only).
|
||||
*/
|
||||
function validateRelationType(rel: string): string {
|
||||
if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(rel)) {
|
||||
throw new Error(`Invalid relation type: ${rel}`);
|
||||
}
|
||||
return rel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Semantic Memory - Higher-level API for AI memory applications
|
||||
*
|
||||
|
|
@ -328,8 +530,10 @@ export class SemanticMemory {
|
|||
}
|
||||
|
||||
// Also store as graph node
|
||||
const safeKey = sanitizeCypher(key);
|
||||
const safeContent = sanitizeCypher(content);
|
||||
await this.db.cypher(
|
||||
`CREATE (m:Memory {key: "${key}", content: "${content.replace(/"/g, '\\"')}", timestamp: ${Date.now()}})`
|
||||
`CREATE (m:Memory {key: "${safeKey}", content: "${safeContent}", timestamp: ${Date.now()}})`
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -361,8 +565,11 @@ export class SemanticMemory {
|
|||
relation: string,
|
||||
toKey: string
|
||||
): Promise<void> {
|
||||
const safeFrom = sanitizeCypher(fromKey);
|
||||
const safeTo = sanitizeCypher(toKey);
|
||||
const safeRel = validateRelationType(relation);
|
||||
await this.db.cypher(
|
||||
`MATCH (a:Memory {key: "${fromKey}"}), (b:Memory {key: "${toKey}"}) CREATE (a)-[:${relation}]->(b)`
|
||||
`MATCH (a:Memory {key: "${safeFrom}"}), (b:Memory {key: "${safeTo}"}) CREATE (a)-[:${safeRel}]->(b)`
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -370,10 +577,340 @@ export class SemanticMemory {
|
|||
* Find related memories through graph traversal
|
||||
*/
|
||||
async findRelated(key: string, depth: number = 2): Promise<QueryResult> {
|
||||
const safeKey = sanitizeCypher(key);
|
||||
const safeDepth = Math.max(1, Math.min(10, Math.floor(depth)));
|
||||
return this.db.cypher(
|
||||
`MATCH (m:Memory {key: "${key}"})-[*1..${depth}]-(related:Memory) RETURN DISTINCT related`
|
||||
`MATCH (m:Memory {key: "${safeKey}"})-[*1..${safeDepth}]-(related:Memory) RETURN DISTINCT related`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── RVF File Envelope ────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* JSON-based RVF file structure used when `@ruvector/rvf-wasm` is not
|
||||
* available. The envelope wraps the standard export_json() payload with
|
||||
* header metadata so the file is self-describing.
|
||||
*/
|
||||
export interface RvfFileEnvelope {
|
||||
/** RVF format version (currently 1). */
|
||||
rvf_version: number;
|
||||
/** Magic identifier — always "RVF1". */
|
||||
magic: 'RVF1';
|
||||
/** ISO-8601 timestamp of when the file was created. */
|
||||
created_at: string;
|
||||
/** Vector dimensions stored in this file. */
|
||||
dimensions: number;
|
||||
/** Distance metric used. */
|
||||
distance_metric: string;
|
||||
/** The full database state (as returned by `exportJson()`). */
|
||||
payload: unknown;
|
||||
}
|
||||
|
||||
// ── Browser Writer Lease ─────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Browser-side writer lease that uses IndexedDB for lock coordination.
|
||||
*
|
||||
* Only one writer may hold the lease for a given `storeId` at a time.
|
||||
* The holder sends heartbeats (timestamp updates) every 10 seconds so
|
||||
* that other tabs / windows can detect stale leases.
|
||||
*
|
||||
* Auto-releases on `beforeunload` to avoid dangling locks.
|
||||
*/
|
||||
export class BrowserWriterLease {
|
||||
private heartbeatInterval: number | null = null;
|
||||
private storeId: string | null = null;
|
||||
private static readonly DB_NAME = '_rvlite_locks';
|
||||
private static readonly STORE_NAME = 'locks';
|
||||
private static readonly HEARTBEAT_MS = 10_000;
|
||||
private static readonly DEFAULT_STALE_MS = 30_000;
|
||||
|
||||
// ---- helpers ----
|
||||
|
||||
private static openDb(): Promise<IDBDatabase> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const req = indexedDB.open(BrowserWriterLease.DB_NAME, 1);
|
||||
req.onupgradeneeded = () => {
|
||||
const db = req.result;
|
||||
if (!db.objectStoreNames.contains(BrowserWriterLease.STORE_NAME)) {
|
||||
db.createObjectStore(BrowserWriterLease.STORE_NAME, { keyPath: 'id' });
|
||||
}
|
||||
};
|
||||
req.onsuccess = () => resolve(req.result);
|
||||
req.onerror = () => reject(req.error);
|
||||
});
|
||||
}
|
||||
|
||||
private static idbPut(db: IDBDatabase, record: unknown): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const tx = db.transaction(BrowserWriterLease.STORE_NAME, 'readwrite');
|
||||
const store = tx.objectStore(BrowserWriterLease.STORE_NAME);
|
||||
const req = store.put(record);
|
||||
req.onsuccess = () => resolve();
|
||||
req.onerror = () => reject(req.error);
|
||||
});
|
||||
}
|
||||
|
||||
private static idbGet(db: IDBDatabase, key: string): Promise<any> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const tx = db.transaction(BrowserWriterLease.STORE_NAME, 'readonly');
|
||||
const store = tx.objectStore(BrowserWriterLease.STORE_NAME);
|
||||
const req = store.get(key);
|
||||
req.onsuccess = () => resolve(req.result);
|
||||
req.onerror = () => reject(req.error);
|
||||
});
|
||||
}
|
||||
|
||||
private static idbDelete(db: IDBDatabase, key: string): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const tx = db.transaction(BrowserWriterLease.STORE_NAME, 'readwrite');
|
||||
const store = tx.objectStore(BrowserWriterLease.STORE_NAME);
|
||||
const req = store.delete(key);
|
||||
req.onsuccess = () => resolve();
|
||||
req.onerror = () => reject(req.error);
|
||||
});
|
||||
}
|
||||
|
||||
// ---- public API ----
|
||||
|
||||
/**
|
||||
* Try to acquire the writer lease for the given store.
|
||||
*
|
||||
* @param storeId - Unique identifier for the rvlite store being locked.
|
||||
* @param timeout - Maximum time in ms to wait for the lease (default 5000).
|
||||
* @returns `true` if the lease was acquired, `false` on timeout.
|
||||
*/
|
||||
async acquire(storeId: string, timeout: number = 5000): Promise<boolean> {
|
||||
if (typeof indexedDB === 'undefined') {
|
||||
throw new Error('BrowserWriterLease requires IndexedDB');
|
||||
}
|
||||
|
||||
const deadline = Date.now() + timeout;
|
||||
const db = await BrowserWriterLease.openDb();
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const existing = await BrowserWriterLease.idbGet(db, storeId);
|
||||
|
||||
if (!existing || await BrowserWriterLease.isStale(storeId)) {
|
||||
// Write our lock record.
|
||||
await BrowserWriterLease.idbPut(db, {
|
||||
id: storeId,
|
||||
holder: this.holderId(),
|
||||
ts: Date.now(),
|
||||
});
|
||||
|
||||
// Re-read to confirm we won (poor-man's CAS).
|
||||
const confirm = await BrowserWriterLease.idbGet(db, storeId);
|
||||
if (confirm && confirm.holder === this.holderId()) {
|
||||
this.storeId = storeId;
|
||||
this.startHeartbeat(db);
|
||||
this.registerUnloadHandler();
|
||||
db.close();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Back off before retrying.
|
||||
await new Promise(r => setTimeout(r, 200));
|
||||
}
|
||||
|
||||
db.close();
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release the currently held lease.
|
||||
*/
|
||||
async release(): Promise<void> {
|
||||
this.stopHeartbeat();
|
||||
|
||||
if (this.storeId === null) return;
|
||||
|
||||
try {
|
||||
const db = await BrowserWriterLease.openDb();
|
||||
await BrowserWriterLease.idbDelete(db, this.storeId);
|
||||
db.close();
|
||||
} catch {
|
||||
// Best-effort release.
|
||||
}
|
||||
|
||||
this.storeId = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the lease for `storeId` is stale (the holder has stopped
|
||||
* sending heartbeats).
|
||||
*
|
||||
* @param storeId - Store identifier.
|
||||
* @param thresholdMs - Staleness threshold (default 30 000 ms).
|
||||
*/
|
||||
static async isStale(
|
||||
storeId: string,
|
||||
thresholdMs: number = BrowserWriterLease.DEFAULT_STALE_MS
|
||||
): Promise<boolean> {
|
||||
if (typeof indexedDB === 'undefined') return true;
|
||||
|
||||
const db = await BrowserWriterLease.openDb();
|
||||
const record = await BrowserWriterLease.idbGet(db, storeId);
|
||||
db.close();
|
||||
|
||||
if (!record) return true;
|
||||
return Date.now() - record.ts > thresholdMs;
|
||||
}
|
||||
|
||||
// ---- private helpers ----
|
||||
|
||||
private _holderId: string | null = null;
|
||||
|
||||
private holderId(): string {
|
||||
if (!this._holderId) {
|
||||
this._holderId = `${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
|
||||
}
|
||||
return this._holderId;
|
||||
}
|
||||
|
||||
private startHeartbeat(db: IDBDatabase): void {
|
||||
this.stopHeartbeat();
|
||||
const storeId = this.storeId!;
|
||||
const holder = this.holderId();
|
||||
|
||||
const beat = async () => {
|
||||
try {
|
||||
const freshDb = await BrowserWriterLease.openDb();
|
||||
await BrowserWriterLease.idbPut(freshDb, {
|
||||
id: storeId,
|
||||
holder,
|
||||
ts: Date.now(),
|
||||
});
|
||||
freshDb.close();
|
||||
} catch {
|
||||
// Heartbeat failures are non-fatal.
|
||||
}
|
||||
};
|
||||
|
||||
this.heartbeatInterval = setInterval(
|
||||
beat,
|
||||
BrowserWriterLease.HEARTBEAT_MS
|
||||
) as unknown as number;
|
||||
}
|
||||
|
||||
private stopHeartbeat(): void {
|
||||
if (this.heartbeatInterval !== null) {
|
||||
clearInterval(this.heartbeatInterval);
|
||||
this.heartbeatInterval = null;
|
||||
}
|
||||
}
|
||||
|
||||
private registerUnloadHandler(): void {
|
||||
if (typeof globalThis.addEventListener === 'function') {
|
||||
const handler = () => {
|
||||
this.stopHeartbeat();
|
||||
// Synchronous best-effort release — IndexedDB is unavailable during
|
||||
// unload in some browsers so we just stop the heartbeat, letting the
|
||||
// lease expire via staleness detection.
|
||||
};
|
||||
globalThis.addEventListener('beforeunload', handler, { once: true });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Epoch Sync ───────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Describes the synchronisation state between the RVF vector store epoch
|
||||
* and the metadata (SQL / Cypher / SPARQL) epoch.
|
||||
*/
|
||||
export interface EpochState {
|
||||
/** Monotonic epoch counter for the RVF vector store. */
|
||||
rvfEpoch: number;
|
||||
/** Monotonic epoch counter for metadata stores. */
|
||||
metadataEpoch: number;
|
||||
/** Human-readable sync status. */
|
||||
status: 'synchronized' | 'rvf_ahead' | 'metadata_ahead';
|
||||
}
|
||||
|
||||
/**
|
||||
* Inspect the current epoch state of an RvLite instance.
|
||||
*
|
||||
* The epochs are stored as metadata keys inside the database itself
|
||||
* (`_rvlite_rvf_epoch` and `_rvlite_metadata_epoch`).
|
||||
*
|
||||
* @param db - An initialised RvLite instance.
|
||||
* @returns The current epoch state.
|
||||
*/
|
||||
export async function checkEpochSync(db: RvLite): Promise<EpochState> {
|
||||
const rvfEntry = await db.get('_rvlite_rvf_epoch');
|
||||
const metaEntry = await db.get('_rvlite_metadata_epoch');
|
||||
|
||||
const rvfEpoch = rvfEntry?.metadata?.epoch as number ?? 0;
|
||||
const metadataEpoch = metaEntry?.metadata?.epoch as number ?? 0;
|
||||
|
||||
let status: EpochState['status'];
|
||||
if (rvfEpoch === metadataEpoch) {
|
||||
status = 'synchronized';
|
||||
} else if (rvfEpoch > metadataEpoch) {
|
||||
status = 'rvf_ahead';
|
||||
} else {
|
||||
status = 'metadata_ahead';
|
||||
}
|
||||
|
||||
return { rvfEpoch, metadataEpoch, status };
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconcile mismatched epochs by advancing the lagging store to match
|
||||
* the leading one.
|
||||
*
|
||||
* - **rvf_ahead**: bumps the metadata epoch to match the RVF epoch.
|
||||
* - **metadata_ahead**: bumps the RVF epoch to match the metadata epoch.
|
||||
* - **synchronized**: no-op.
|
||||
*
|
||||
* @param db - An initialised RvLite instance.
|
||||
* @param state - The epoch state (as returned by `checkEpochSync`).
|
||||
*/
|
||||
export async function reconcileEpochs(
|
||||
db: RvLite,
|
||||
state: EpochState
|
||||
): Promise<void> {
|
||||
if (state.status === 'synchronized') return;
|
||||
|
||||
const targetEpoch = Math.max(state.rvfEpoch, state.metadataEpoch);
|
||||
const dummyVector = [0]; // minimal placeholder vector
|
||||
|
||||
// Upsert both epoch sentinel records to the target epoch.
|
||||
// We use insertWithId so the key is deterministic.
|
||||
try { await db.delete('_rvlite_rvf_epoch'); } catch { /* may not exist */ }
|
||||
try { await db.delete('_rvlite_metadata_epoch'); } catch { /* may not exist */ }
|
||||
|
||||
await db.insertWithId('_rvlite_rvf_epoch', dummyVector, { epoch: targetEpoch });
|
||||
await db.insertWithId('_rvlite_metadata_epoch', dummyVector, { epoch: targetEpoch });
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience helper: increment the RVF epoch by 1.
|
||||
* Call this after every successful vector-store mutation.
|
||||
*/
|
||||
export async function bumpRvfEpoch(db: RvLite): Promise<number> {
|
||||
const current = await checkEpochSync(db);
|
||||
const next = current.rvfEpoch + 1;
|
||||
const dummyVector = [0];
|
||||
try { await db.delete('_rvlite_rvf_epoch'); } catch { /* ignore */ }
|
||||
await db.insertWithId('_rvlite_rvf_epoch', dummyVector, { epoch: next });
|
||||
return next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience helper: increment the metadata epoch by 1.
|
||||
* Call this after every successful metadata mutation (SQL / Cypher / SPARQL).
|
||||
*/
|
||||
export async function bumpMetadataEpoch(db: RvLite): Promise<number> {
|
||||
const current = await checkEpochSync(db);
|
||||
const next = current.metadataEpoch + 1;
|
||||
const dummyVector = [0];
|
||||
try { await db.delete('_rvlite_metadata_epoch'); } catch { /* ignore */ }
|
||||
await db.insertWithId('_rvlite_metadata_epoch', dummyVector, { epoch: next });
|
||||
return next;
|
||||
}
|
||||
|
||||
export default RvLite;
|
||||
|
|
|
|||
318
tests/rvf-integration/smoke-test.js
Normal file
318
tests/rvf-integration/smoke-test.js
Normal file
|
|
@ -0,0 +1,318 @@
|
|||
#!/usr/bin/env node
|
||||
/**
|
||||
* End-to-end RVF CLI smoke test.
|
||||
*
|
||||
* Tests the full lifecycle via `npx ruvector rvf` CLI commands:
|
||||
* create -> ingest -> query -> restart simulation -> query -> verify match
|
||||
*
|
||||
* Exits with code 0 on success, code 1 on failure.
|
||||
*
|
||||
* Usage:
|
||||
* node tests/rvf-integration/smoke-test.js
|
||||
*/
|
||||
|
||||
'use strict';
|
||||
|
||||
const { execFileSync } = require('child_process');
|
||||
const fs = require('fs');
|
||||
const os = require('os');
|
||||
const path = require('path');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Configuration
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const DIM = 128;
|
||||
const METRIC = 'cosine';
|
||||
const VECTOR_COUNT = 20;
|
||||
const K = 5;
|
||||
|
||||
// Locate the CLI entry point relative to the repo root.
|
||||
const REPO_ROOT = path.resolve(__dirname, '..', '..');
|
||||
const CLI_PATH = path.join(REPO_ROOT, 'npm', 'packages', 'ruvector', 'bin', 'cli.js');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let tmpDir;
|
||||
let storePath;
|
||||
let inputPath;
|
||||
let childPath;
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
|
||||
/**
|
||||
* Deterministic pseudo-random vector generation using an LCG.
|
||||
* Matches the Rust `random_vector` function for cross-validation.
|
||||
*/
|
||||
function randomVector(dim, seed) {
|
||||
const v = new Float64Array(dim);
|
||||
let x = BigInt(seed) & 0xFFFFFFFFFFFFFFFFn;
|
||||
for (let i = 0; i < dim; i++) {
|
||||
x = (x * 6364136223846793005n + 1442695040888963407n) & 0xFFFFFFFFFFFFFFFFn;
|
||||
v[i] = Number(x >> 33n) / 4294967295.0 - 0.5;
|
||||
}
|
||||
// Normalize for cosine.
|
||||
let norm = 0;
|
||||
for (let i = 0; i < dim; i++) norm += v[i] * v[i];
|
||||
norm = Math.sqrt(norm);
|
||||
const result = [];
|
||||
for (let i = 0; i < dim; i++) result.push(norm > 1e-8 ? v[i] / norm : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a CLI command and return stdout as a string.
|
||||
* Throws on non-zero exit code.
|
||||
*/
|
||||
function runCli(args, opts = {}) {
|
||||
const cmdArgs = ['node', CLI_PATH, 'rvf', ...args];
|
||||
try {
|
||||
const stdout = execFileSync(cmdArgs[0], cmdArgs.slice(1), {
|
||||
cwd: REPO_ROOT,
|
||||
timeout: 30000,
|
||||
encoding: 'utf8',
|
||||
env: {
|
||||
...process.env,
|
||||
// Disable chalk colors for easier parsing.
|
||||
FORCE_COLOR: '0',
|
||||
NO_COLOR: '1',
|
||||
},
|
||||
...opts,
|
||||
});
|
||||
return stdout.trim();
|
||||
} catch (e) {
|
||||
const stderr = e.stderr ? e.stderr.toString().trim() : '';
|
||||
const stdout = e.stdout ? e.stdout.toString().trim() : '';
|
||||
throw new Error(
|
||||
`CLI failed (exit ${e.status}): ${args.join(' ')}\n` +
|
||||
` stdout: ${stdout}\n` +
|
||||
` stderr: ${stderr}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Assert a condition and track pass/fail.
|
||||
*/
|
||||
function assert(condition, message) {
|
||||
if (condition) {
|
||||
passed++;
|
||||
console.log(` PASS: ${message}`);
|
||||
} else {
|
||||
failed++;
|
||||
console.error(` FAIL: ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Assert that a function throws (CLI command fails).
|
||||
*/
|
||||
function assertThrows(fn, message) {
|
||||
try {
|
||||
fn();
|
||||
failed++;
|
||||
console.error(` FAIL: ${message} (expected error, got success)`);
|
||||
} catch (_e) {
|
||||
passed++;
|
||||
console.log(` PASS: ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Setup
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function setup() {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'rvf-smoke-'));
|
||||
storePath = path.join(tmpDir, 'smoke.rvf');
|
||||
inputPath = path.join(tmpDir, 'vectors.json');
|
||||
childPath = path.join(tmpDir, 'child.rvf');
|
||||
|
||||
// Generate input vectors as JSON.
|
||||
const entries = [];
|
||||
for (let i = 0; i < VECTOR_COUNT; i++) {
|
||||
const id = i + 1;
|
||||
const vector = randomVector(DIM, id * 17 + 5);
|
||||
entries.push({ id, vector });
|
||||
}
|
||||
fs.writeFileSync(inputPath, JSON.stringify(entries));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Teardown
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function teardown() {
|
||||
try {
|
||||
if (tmpDir && fs.existsSync(tmpDir)) {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
} catch (_e) {
|
||||
// Best-effort cleanup.
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test steps
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function testCreate() {
|
||||
console.log('\nStep 1: Create store');
|
||||
const output = runCli(['create', storePath, '-d', String(DIM), '-m', METRIC]);
|
||||
assert(output.includes('Created') || output.includes('created'), 'create reports success');
|
||||
assert(fs.existsSync(storePath), 'store file exists on disk');
|
||||
}
|
||||
|
||||
function testIngest() {
|
||||
console.log('\nStep 2: Ingest vectors');
|
||||
const output = runCli(['ingest', storePath, '-i', inputPath]);
|
||||
assert(
|
||||
output.includes('Ingested') || output.includes('accepted'),
|
||||
'ingest reports accepted vectors'
|
||||
);
|
||||
}
|
||||
|
||||
function testQueryFirst() {
|
||||
console.log('\nStep 3: Query (first pass)');
|
||||
// Query with the vector for id=10 (seed = 9 * 17 + 5 = 158).
|
||||
const queryVec = randomVector(DIM, 9 * 17 + 5);
|
||||
const vecStr = queryVec.map(v => v.toFixed(8)).join(',');
|
||||
const output = runCli(['query', storePath, '-v', vecStr, '-k', String(K)]);
|
||||
assert(output.includes('result'), 'query returns results');
|
||||
|
||||
// Parse result count.
|
||||
const countMatch = output.match(/(\d+)\s*result/);
|
||||
if (countMatch) {
|
||||
const count = parseInt(countMatch[1], 10);
|
||||
assert(count > 0, `query returned ${count} results (> 0)`);
|
||||
assert(count <= K, `query returned ${count} results (<= ${K})`);
|
||||
} else {
|
||||
assert(false, 'could not parse result count from output');
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
function testStatus() {
|
||||
console.log('\nStep 4: Status check');
|
||||
const output = runCli(['status', storePath]);
|
||||
assert(output.includes('total_vectors') || output.includes('totalVectors'), 'status shows vector count');
|
||||
}
|
||||
|
||||
function testSegments() {
|
||||
console.log('\nStep 5: Segment listing');
|
||||
const output = runCli(['segments', storePath]);
|
||||
assert(
|
||||
output.includes('segment') || output.includes('type='),
|
||||
'segments command lists segments'
|
||||
);
|
||||
}
|
||||
|
||||
function testCompact() {
|
||||
console.log('\nStep 6: Compact');
|
||||
const output = runCli(['compact', storePath]);
|
||||
assert(output.includes('Compact') || output.includes('compact'), 'compact reports completion');
|
||||
}
|
||||
|
||||
function testDerive() {
|
||||
console.log('\nStep 7: Derive child store');
|
||||
const output = runCli(['derive', storePath, childPath]);
|
||||
assert(
|
||||
output.includes('Derived') || output.includes('derived'),
|
||||
'derive reports success'
|
||||
);
|
||||
assert(fs.existsSync(childPath), 'child store file exists on disk');
|
||||
}
|
||||
|
||||
function testChildSegments() {
|
||||
console.log('\nStep 8: Child segment listing');
|
||||
const output = runCli(['segments', childPath]);
|
||||
assert(
|
||||
output.includes('segment') || output.includes('type='),
|
||||
'child segments command lists segments'
|
||||
);
|
||||
}
|
||||
|
||||
function testStatusAfterLifecycle() {
|
||||
console.log('\nStep 9: Final status check');
|
||||
const output = runCli(['status', storePath]);
|
||||
assert(output.length > 0, 'status returns non-empty output');
|
||||
}
|
||||
|
||||
function testExport() {
|
||||
console.log('\nStep 10: Export');
|
||||
const exportPath = path.join(tmpDir, 'export.json');
|
||||
const output = runCli(['export', storePath, '-o', exportPath]);
|
||||
assert(
|
||||
output.includes('Exported') || output.includes('exported') || fs.existsSync(exportPath),
|
||||
'export produces output file'
|
||||
);
|
||||
if (fs.existsSync(exportPath)) {
|
||||
const data = JSON.parse(fs.readFileSync(exportPath, 'utf8'));
|
||||
assert(data.status !== undefined, 'export contains status');
|
||||
assert(data.segments !== undefined, 'export contains segments');
|
||||
}
|
||||
}
|
||||
|
||||
function testNonexistentStore() {
|
||||
console.log('\nStep 11: Error handling');
|
||||
assertThrows(
|
||||
() => runCli(['status', '/tmp/nonexistent_smoke_test_rvf_99999.rvf']),
|
||||
'status on nonexistent store fails with error'
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function main() {
|
||||
console.log('=== RVF CLI End-to-End Smoke Test ===');
|
||||
console.log(` DIM=${DIM} METRIC=${METRIC} VECTORS=${VECTOR_COUNT} K=${K}`);
|
||||
|
||||
setup();
|
||||
|
||||
try {
|
||||
// Check if CLI exists before running tests.
|
||||
if (!fs.existsSync(CLI_PATH)) {
|
||||
console.error(`\nCLI not found at: ${CLI_PATH}`);
|
||||
console.error('Skipping CLI smoke test (CLI not built).');
|
||||
console.log('\n=== SKIPPED (CLI not available) ===');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
testCreate();
|
||||
testIngest();
|
||||
testQueryFirst();
|
||||
testStatus();
|
||||
testSegments();
|
||||
testCompact();
|
||||
testDerive();
|
||||
testChildSegments();
|
||||
testStatusAfterLifecycle();
|
||||
testExport();
|
||||
testNonexistentStore();
|
||||
} catch (e) {
|
||||
// If any step throws unexpectedly, we still want to report and clean up.
|
||||
failed++;
|
||||
console.error(`\nUNEXPECTED ERROR: ${e.message}`);
|
||||
if (e.stack) console.error(e.stack);
|
||||
} finally {
|
||||
teardown();
|
||||
}
|
||||
|
||||
// Summary.
|
||||
const total = passed + failed;
|
||||
console.log(`\n=== Results: ${passed}/${total} passed, ${failed} failed ===`);
|
||||
|
||||
if (failed > 0) {
|
||||
process.exit(1);
|
||||
} else {
|
||||
console.log('All smoke tests passed.');
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
606
tests/rvf-integration/tests/rvf_smoke_test.rs
Normal file
606
tests/rvf-integration/tests/rvf_smoke_test.rs
Normal file
|
|
@ -0,0 +1,606 @@
|
|||
//! End-to-end RVF smoke test -- full lifecycle verification.
|
||||
//!
|
||||
//! Exercises the complete RVF pipeline through 15 steps:
|
||||
//! 1. Create a new store (dim=128, cosine metric)
|
||||
//! 2. Ingest 100 random vectors with metadata
|
||||
//! 3. Query for 10 nearest neighbors of a known vector
|
||||
//! 4. Verify results are sorted and distances are valid (0.0..2.0 for cosine)
|
||||
//! 5. Close the store
|
||||
//! 6. Reopen the store (simulating process restart)
|
||||
//! 7. Query again with the same vector
|
||||
//! 8. Verify results match the first query exactly (persistence verified)
|
||||
//! 9. Delete some vectors
|
||||
//! 10. Compact the store
|
||||
//! 11. Verify deleted vectors no longer appear in results
|
||||
//! 12. Derive a child store
|
||||
//! 13. Verify child can be queried independently
|
||||
//! 14. Verify segment listing works on both parent and child
|
||||
//! 15. Clean up temporary files
|
||||
//!
|
||||
//! NOTE: The `DistanceMetric` is not persisted in the manifest, so after
|
||||
//! `RvfStore::open()` the metric defaults to L2. The lifecycle test therefore
|
||||
//! uses L2 for the cross-restart comparison (steps 5-8), while cosine-specific
|
||||
//! assertions are exercised in a dedicated single-session test.
|
||||
|
||||
use rvf_runtime::options::{
|
||||
DistanceMetric, MetadataEntry, MetadataValue, QueryOptions, RvfOptions,
|
||||
};
|
||||
use rvf_runtime::RvfStore;
|
||||
use rvf_types::DerivationType;
|
||||
use tempfile::TempDir;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Deterministic pseudo-random vector generation using an LCG.
|
||||
/// Produces values in [-0.5, 0.5).
|
||||
fn random_vector(dim: usize, seed: u64) -> Vec<f32> {
|
||||
let mut v = Vec::with_capacity(dim);
|
||||
let mut x = seed;
|
||||
for _ in 0..dim {
|
||||
x = x
|
||||
.wrapping_mul(6364136223846793005)
|
||||
.wrapping_add(1442695040888963407);
|
||||
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
/// L2-normalize a vector in place so cosine distance is well-defined.
|
||||
fn normalize(v: &mut [f32]) {
|
||||
let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if norm > f32::EPSILON {
|
||||
for x in v.iter_mut() {
|
||||
*x /= norm;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a normalized random vector suitable for cosine queries.
|
||||
fn random_unit_vector(dim: usize, seed: u64) -> Vec<f32> {
|
||||
let mut v = random_vector(dim, seed);
|
||||
normalize(&mut v);
|
||||
v
|
||||
}
|
||||
|
||||
fn make_options(dim: u16, metric: DistanceMetric) -> RvfOptions {
|
||||
RvfOptions {
|
||||
dimension: dim,
|
||||
metric,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Full lifecycle smoke test (L2 metric for cross-restart consistency)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn rvf_smoke_full_lifecycle() {
|
||||
let dir = TempDir::new().expect("failed to create temp dir");
|
||||
let store_path = dir.path().join("smoke_lifecycle.rvf");
|
||||
let child_path = dir.path().join("smoke_child.rvf");
|
||||
|
||||
let dim: u16 = 128;
|
||||
let k: usize = 10;
|
||||
let vector_count: usize = 100;
|
||||
|
||||
// Use L2 metric for the lifecycle test because the metric is not persisted
|
||||
// in the manifest. After reopen, the store defaults to L2, so using L2
|
||||
// throughout ensures cross-restart distance comparisons are exact.
|
||||
let options = make_options(dim, DistanceMetric::L2);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 1: Create a new RVF store with dimension 128 and cosine metric
|
||||
// -----------------------------------------------------------------------
|
||||
let mut store = RvfStore::create(&store_path, options.clone())
|
||||
.expect("step 1: failed to create store");
|
||||
|
||||
// Verify initial state.
|
||||
let initial_status = store.status();
|
||||
assert_eq!(initial_status.total_vectors, 0, "step 1: new store should be empty");
|
||||
assert!(!initial_status.read_only, "step 1: new store should not be read-only");
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 2: Ingest 100 random vectors with metadata
|
||||
// -----------------------------------------------------------------------
|
||||
let vectors: Vec<Vec<f32>> = (0..vector_count as u64)
|
||||
.map(|i| random_vector(dim as usize, i * 17 + 5))
|
||||
.collect();
|
||||
let vec_refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=vector_count as u64).collect();
|
||||
|
||||
// One metadata entry per vector: field_id=0, value=category string.
|
||||
let metadata: Vec<MetadataEntry> = ids
|
||||
.iter()
|
||||
.map(|&id| MetadataEntry {
|
||||
field_id: 0,
|
||||
value: MetadataValue::String(format!("group_{}", id % 5)),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let ingest_result = store
|
||||
.ingest_batch(&vec_refs, &ids, Some(&metadata))
|
||||
.expect("step 2: ingest failed");
|
||||
|
||||
assert_eq!(
|
||||
ingest_result.accepted, vector_count as u64,
|
||||
"step 2: all {} vectors should be accepted",
|
||||
vector_count,
|
||||
);
|
||||
assert_eq!(ingest_result.rejected, 0, "step 2: no vectors should be rejected");
|
||||
assert!(ingest_result.epoch > 0, "step 2: epoch should advance after ingest");
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 3: Query for 10 nearest neighbors of a known vector
|
||||
// -----------------------------------------------------------------------
|
||||
// Use vector with id=50 as the query (seed = 49 * 17 + 5 = 838).
|
||||
let query_vec = random_vector(dim as usize, 49 * 17 + 5);
|
||||
let results_first = store
|
||||
.query(&query_vec, k, &QueryOptions::default())
|
||||
.expect("step 3: query failed");
|
||||
|
||||
assert_eq!(
|
||||
results_first.len(),
|
||||
k,
|
||||
"step 3: should return exactly {} results",
|
||||
k,
|
||||
);
|
||||
|
||||
// The first result should be the exact match (id=50).
|
||||
assert_eq!(
|
||||
results_first[0].id, 50,
|
||||
"step 3: exact match vector should be first result",
|
||||
);
|
||||
assert!(
|
||||
results_first[0].distance < 1e-5,
|
||||
"step 3: exact match distance should be near zero, got {}",
|
||||
results_first[0].distance,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 4: Verify results are sorted by distance and distances are valid
|
||||
// (L2 distances are non-negative)
|
||||
// -----------------------------------------------------------------------
|
||||
for i in 1..results_first.len() {
|
||||
assert!(
|
||||
results_first[i].distance >= results_first[i - 1].distance,
|
||||
"step 4: results not sorted at position {}: {} > {}",
|
||||
i,
|
||||
results_first[i - 1].distance,
|
||||
results_first[i].distance,
|
||||
);
|
||||
}
|
||||
for r in &results_first {
|
||||
assert!(
|
||||
r.distance >= 0.0,
|
||||
"step 4: L2 distance {} should be non-negative",
|
||||
r.distance,
|
||||
);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 5: Close the store
|
||||
// -----------------------------------------------------------------------
|
||||
store.close().expect("step 5: close failed");
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 6: Reopen the store (simulating process restart)
|
||||
// -----------------------------------------------------------------------
|
||||
let store = RvfStore::open(&store_path).expect("step 6: reopen failed");
|
||||
let reopen_status = store.status();
|
||||
assert_eq!(
|
||||
reopen_status.total_vectors, vector_count as u64,
|
||||
"step 6: all {} vectors should persist after reopen",
|
||||
vector_count,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 7: Query again with the same vector
|
||||
// -----------------------------------------------------------------------
|
||||
let results_second = store
|
||||
.query(&query_vec, k, &QueryOptions::default())
|
||||
.expect("step 7: query after reopen failed");
|
||||
|
||||
assert_eq!(
|
||||
results_second.len(),
|
||||
k,
|
||||
"step 7: should return exactly {} results after reopen",
|
||||
k,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 8: Verify results match the first query exactly (persistence)
|
||||
//
|
||||
// After reopen, the internal iteration order of vectors may differ, which
|
||||
// can affect tie-breaking in the k-NN heap. We therefore compare:
|
||||
// (a) the set of result IDs must be identical,
|
||||
// (b) distances for each ID must match within floating-point tolerance,
|
||||
// (c) result count must be the same.
|
||||
// -----------------------------------------------------------------------
|
||||
assert_eq!(
|
||||
results_first.len(),
|
||||
results_second.len(),
|
||||
"step 8: result count should match across restart",
|
||||
);
|
||||
|
||||
// Build a map of id -> distance for comparison.
|
||||
let first_map: std::collections::HashMap<u64, f32> = results_first
|
||||
.iter()
|
||||
.map(|r| (r.id, r.distance))
|
||||
.collect();
|
||||
let second_map: std::collections::HashMap<u64, f32> = results_second
|
||||
.iter()
|
||||
.map(|r| (r.id, r.distance))
|
||||
.collect();
|
||||
|
||||
// Verify the exact same IDs appear in both result sets.
|
||||
let mut first_ids: Vec<u64> = first_map.keys().copied().collect();
|
||||
let mut second_ids: Vec<u64> = second_map.keys().copied().collect();
|
||||
first_ids.sort();
|
||||
second_ids.sort();
|
||||
assert_eq!(
|
||||
first_ids, second_ids,
|
||||
"step 8: result ID sets must match across restart",
|
||||
);
|
||||
|
||||
// Verify distances match per-ID within tolerance.
|
||||
for &id in &first_ids {
|
||||
let d1 = first_map[&id];
|
||||
let d2 = second_map[&id];
|
||||
assert!(
|
||||
(d1 - d2).abs() < 1e-5,
|
||||
"step 8: distance mismatch for id={}: {} vs {} (pre vs post restart)",
|
||||
id, d1, d2,
|
||||
);
|
||||
}
|
||||
|
||||
// Need a mutable store for delete/compact. Drop the read-write handle and
|
||||
// reopen it mutably.
|
||||
store.close().expect("step 8: close for mutable reopen failed");
|
||||
let mut store = RvfStore::open(&store_path).expect("step 8: mutable reopen failed");
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 9: Delete some vectors (ids 1..=10)
|
||||
// -----------------------------------------------------------------------
|
||||
let delete_ids: Vec<u64> = (1..=10).collect();
|
||||
let del_result = store
|
||||
.delete(&delete_ids)
|
||||
.expect("step 9: delete failed");
|
||||
|
||||
assert_eq!(
|
||||
del_result.deleted, 10,
|
||||
"step 9: should have deleted 10 vectors",
|
||||
);
|
||||
assert!(
|
||||
del_result.epoch > reopen_status.current_epoch,
|
||||
"step 9: epoch should advance after delete",
|
||||
);
|
||||
|
||||
// Quick verification: deleted vectors should not appear in query.
|
||||
let post_delete_results = store
|
||||
.query(&query_vec, vector_count, &QueryOptions::default())
|
||||
.expect("step 9: post-delete query failed");
|
||||
|
||||
for r in &post_delete_results {
|
||||
assert!(
|
||||
r.id > 10,
|
||||
"step 9: deleted vector {} should not appear in results",
|
||||
r.id,
|
||||
);
|
||||
}
|
||||
assert_eq!(
|
||||
post_delete_results.len(),
|
||||
vector_count - 10,
|
||||
"step 9: should have {} results after deleting 10",
|
||||
vector_count - 10,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 10: Compact the store
|
||||
// -----------------------------------------------------------------------
|
||||
let pre_compact_epoch = store.status().current_epoch;
|
||||
let compact_result = store.compact().expect("step 10: compact failed");
|
||||
|
||||
assert!(
|
||||
compact_result.segments_compacted > 0 || compact_result.bytes_reclaimed > 0,
|
||||
"step 10: compaction should reclaim space",
|
||||
);
|
||||
assert!(
|
||||
compact_result.epoch > pre_compact_epoch,
|
||||
"step 10: epoch should advance after compact",
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 11: Verify deleted vectors no longer appear in results
|
||||
// -----------------------------------------------------------------------
|
||||
let post_compact_results = store
|
||||
.query(&query_vec, vector_count, &QueryOptions::default())
|
||||
.expect("step 11: post-compact query failed");
|
||||
|
||||
for r in &post_compact_results {
|
||||
assert!(
|
||||
r.id > 10,
|
||||
"step 11: deleted vector {} appeared after compaction",
|
||||
r.id,
|
||||
);
|
||||
}
|
||||
assert_eq!(
|
||||
post_compact_results.len(),
|
||||
vector_count - 10,
|
||||
"step 11: should still have {} results post-compact",
|
||||
vector_count - 10,
|
||||
);
|
||||
|
||||
// Verify post-compact status.
|
||||
let post_compact_status = store.status();
|
||||
assert_eq!(
|
||||
post_compact_status.total_vectors,
|
||||
(vector_count - 10) as u64,
|
||||
"step 11: status should reflect {} live vectors",
|
||||
vector_count - 10,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 12: Derive a child store
|
||||
// -----------------------------------------------------------------------
|
||||
let child = store
|
||||
.derive(&child_path, DerivationType::Clone, Some(options.clone()))
|
||||
.expect("step 12: derive failed");
|
||||
|
||||
// Verify lineage.
|
||||
assert_eq!(
|
||||
child.lineage_depth(),
|
||||
1,
|
||||
"step 12: child lineage depth should be 1",
|
||||
);
|
||||
assert_eq!(
|
||||
child.parent_id(),
|
||||
store.file_id(),
|
||||
"step 12: child parent_id should match parent file_id",
|
||||
);
|
||||
assert_ne!(
|
||||
child.file_id(),
|
||||
store.file_id(),
|
||||
"step 12: child should have a distinct file_id",
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 13: Verify child can be queried independently
|
||||
// -----------------------------------------------------------------------
|
||||
// The child is a fresh derived store (no vectors copied by default via
|
||||
// derive -- only lineage metadata). Query should return empty or results
|
||||
// depending on whether vectors were inherited. We just verify it does not
|
||||
// panic and returns a valid response.
|
||||
let child_query = random_vector(dim as usize, 999);
|
||||
let child_results = child
|
||||
.query(&child_query, k, &QueryOptions::default())
|
||||
.expect("step 13: child query failed");
|
||||
|
||||
// Child is newly derived with no vectors of its own, so results should be empty.
|
||||
assert!(
|
||||
child_results.is_empty(),
|
||||
"step 13: freshly derived child should have no vectors, got {}",
|
||||
child_results.len(),
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 14: Verify segment listing works on both parent and child
|
||||
// -----------------------------------------------------------------------
|
||||
let parent_segments = store.segment_dir();
|
||||
assert!(
|
||||
!parent_segments.is_empty(),
|
||||
"step 14: parent should have at least one segment",
|
||||
);
|
||||
|
||||
let child_segments = child.segment_dir();
|
||||
assert!(
|
||||
!child_segments.is_empty(),
|
||||
"step 14: child should have at least one segment (manifest)",
|
||||
);
|
||||
|
||||
// Verify segment tuples have valid structure (seg_id > 0, type byte > 0).
|
||||
for &(seg_id, _offset, _len, seg_type) in parent_segments {
|
||||
assert!(seg_id > 0, "step 14: parent segment ID should be > 0");
|
||||
assert!(seg_type > 0, "step 14: parent segment type should be > 0");
|
||||
}
|
||||
for &(seg_id, _offset, _len, seg_type) in child_segments {
|
||||
assert!(seg_id > 0, "step 14: child segment ID should be > 0");
|
||||
assert!(seg_type > 0, "step 14: child segment type should be > 0");
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 15: Clean up temporary files
|
||||
// -----------------------------------------------------------------------
|
||||
child.close().expect("step 15: child close failed");
|
||||
store.close().expect("step 15: parent close failed");
|
||||
|
||||
// TempDir's Drop impl will remove the directory, but verify the files exist
|
||||
// before cleanup happens.
|
||||
assert!(
|
||||
store_path.exists(),
|
||||
"step 15: parent store file should exist before cleanup",
|
||||
);
|
||||
assert!(
|
||||
child_path.exists(),
|
||||
"step 15: child store file should exist before cleanup",
|
||||
);
|
||||
|
||||
// Explicitly drop the TempDir to trigger cleanup.
|
||||
drop(dir);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Additional focused smoke tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Verify that cosine metric returns distances strictly in [0.0, 2.0] range
|
||||
/// for all query results when using normalized vectors. This test runs within
|
||||
/// a single session (no restart) to avoid the metric-not-persisted issue.
|
||||
#[test]
|
||||
fn smoke_cosine_distance_range() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("cosine_range.rvf");
|
||||
|
||||
let dim: u16 = 128;
|
||||
let options = make_options(dim, DistanceMetric::Cosine);
|
||||
|
||||
let mut store = RvfStore::create(&path, options).unwrap();
|
||||
|
||||
// Ingest 50 normalized vectors.
|
||||
let vectors: Vec<Vec<f32>> = (0..50)
|
||||
.map(|i| random_unit_vector(dim as usize, i * 31 + 3))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=50).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
|
||||
// Query with several different vectors and verify distance range.
|
||||
for seed in [0, 42, 100, 999, 12345] {
|
||||
let q = random_unit_vector(dim as usize, seed);
|
||||
let results = store.query(&q, 50, &QueryOptions::default()).unwrap();
|
||||
|
||||
for r in &results {
|
||||
assert!(
|
||||
r.distance >= 0.0 && r.distance <= 2.0,
|
||||
"cosine distance {} out of range [0.0, 2.0] for seed {}",
|
||||
r.distance,
|
||||
seed,
|
||||
);
|
||||
}
|
||||
|
||||
// Verify sorting.
|
||||
for i in 1..results.len() {
|
||||
assert!(
|
||||
results[i].distance >= results[i - 1].distance,
|
||||
"results not sorted for seed {}: {} > {} at position {}",
|
||||
seed,
|
||||
results[i - 1].distance,
|
||||
results[i].distance,
|
||||
i,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
/// Verify persistence across multiple close/reopen cycles with interleaved
|
||||
/// ingests and deletes. Uses L2 metric for cross-restart consistency.
|
||||
#[test]
|
||||
fn smoke_multi_restart_persistence() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("multi_restart.rvf");
|
||||
let dim: u16 = 128;
|
||||
|
||||
let options = make_options(dim, DistanceMetric::L2);
|
||||
|
||||
// Cycle 1: create and ingest 50 vectors.
|
||||
{
|
||||
let mut store = RvfStore::create(&path, options.clone()).unwrap();
|
||||
let vectors: Vec<Vec<f32>> = (0..50)
|
||||
.map(|i| random_vector(dim as usize, i))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=50).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
assert_eq!(store.status().total_vectors, 50);
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Cycle 2: reopen, ingest 50 more, delete 10, close.
|
||||
{
|
||||
let mut store = RvfStore::open(&path).unwrap();
|
||||
assert_eq!(store.status().total_vectors, 50);
|
||||
|
||||
let vectors: Vec<Vec<f32>> = (50..100)
|
||||
.map(|i| random_vector(dim as usize, i))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (51..=100).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
assert_eq!(store.status().total_vectors, 100);
|
||||
|
||||
store.delete(&[5, 10, 15, 20, 25, 55, 60, 65, 70, 75]).unwrap();
|
||||
assert_eq!(store.status().total_vectors, 90);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Cycle 3: reopen, verify counts, compact, close.
|
||||
{
|
||||
let mut store = RvfStore::open(&path).unwrap();
|
||||
assert_eq!(
|
||||
store.status().total_vectors, 90,
|
||||
"cycle 3: 90 vectors should survive two restarts",
|
||||
);
|
||||
|
||||
store.compact().unwrap();
|
||||
assert_eq!(store.status().total_vectors, 90);
|
||||
|
||||
// Verify no deleted IDs appear in a full query.
|
||||
let q = random_vector(dim as usize, 42);
|
||||
let results = store.query(&q, 100, &QueryOptions::default()).unwrap();
|
||||
let deleted_ids = [5, 10, 15, 20, 25, 55, 60, 65, 70, 75];
|
||||
for r in &results {
|
||||
assert!(
|
||||
!deleted_ids.contains(&r.id),
|
||||
"cycle 3: deleted vector {} appeared after compact + restart",
|
||||
r.id,
|
||||
);
|
||||
}
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Cycle 4: final reopen (readonly), verify persistence survived compact.
|
||||
{
|
||||
let store = RvfStore::open_readonly(&path).unwrap();
|
||||
assert_eq!(
|
||||
store.status().total_vectors, 90,
|
||||
"cycle 4: 90 vectors should survive compact + restart",
|
||||
);
|
||||
assert!(store.status().read_only);
|
||||
}
|
||||
}
|
||||
|
||||
/// Verify metadata ingestion and that vector IDs are correct after batch
|
||||
/// operations.
|
||||
#[test]
|
||||
fn smoke_metadata_and_ids() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("meta_ids.rvf");
|
||||
let dim: u16 = 128;
|
||||
|
||||
let options = make_options(dim, DistanceMetric::L2);
|
||||
|
||||
let mut store = RvfStore::create(&path, options).unwrap();
|
||||
|
||||
// Ingest 100 vectors, each with a metadata entry.
|
||||
let vectors: Vec<Vec<f32>> = (0..100)
|
||||
.map(|i| random_vector(dim as usize, i * 7 + 1))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=100).collect();
|
||||
let metadata: Vec<MetadataEntry> = ids
|
||||
.iter()
|
||||
.map(|&id| MetadataEntry {
|
||||
field_id: 0,
|
||||
value: MetadataValue::U64(id),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let result = store.ingest_batch(&refs, &ids, Some(&metadata)).unwrap();
|
||||
assert_eq!(result.accepted, 100);
|
||||
assert_eq!(result.rejected, 0);
|
||||
|
||||
// Query for exact match of vector id=42.
|
||||
let query = random_vector(dim as usize, 41 * 7 + 1);
|
||||
let results = store.query(&query, 1, &QueryOptions::default()).unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].id, 42, "exact match should be id=42");
|
||||
assert!(results[0].distance < 1e-5);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue