From e2350b759ff4e1c3bbacfaac7462b7d8bca1be21 Mon Sep 17 00:00:00 2001 From: rUv Date: Sat, 23 May 2026 03:37:35 -0400 Subject: [PATCH] fix(core): HNSW correctness fixes, k=0 guard, sorted results, cross-integration helpers (v2.2.3) (#502) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(core): correctness + safety fixes in HNSW/flat index + cross-integration helpers (v2.2.3) Correctness fixes: - hnsw: `DistanceFn::eval` now clamps distance to 0.0 — prevents hnsw_rs internal BinaryHeap assertion panic when floating-point rounding yields a marginally-negative cosine/euclidean distance for near-identical vectors - hnsw: `set_ef_search` was a silent no-op; now correctly writes to `config.ef_search` so callers can tune recall at query time - hnsw: `search_with_ef` clamps `ef_search` to `max(ef_search, k)` to prevent silent under-recall when ef_search < k (hnsw_rs constraint) - hnsw: `search_with_ef` now explicitly returns an empty slice for k=0 instead of forwarding to hnsw_rs which may panic - hnsw: `search_with_ef` returns early (empty slice) when index is empty to avoid hnsw_rs BinaryHeap `.peek().unwrap()` panic on zero-element index - hnsw: results are now explicitly sorted by ascending distance; hnsw_rs does not guarantee this order in all code paths - hnsw: deserialization rebuilds the HNSW graph in index order (sorted by idx) and uses an O(n) HashMap lookup instead of O(n^2) linear search over the vectors vec during restore - flat: added k=0 guard (returns empty slice, no panic) - flat: switched sort to `sort_unstable_by` with a `partial_cmp` fallback to handle NaN distances gracefully and improve throughput on large sets API improvement: - types: `HnswConfig::default()` now uses `max_elements=1_000_000` (was 10_000_000) and `m=16/ef_construction=100` to avoid excessive upfront memory allocation in the common case; large-index callers can still set `max_elements` explicitly New module: - integration: `FannAdapter` and `SemanticSearchAdapter` — thin wrappers that make ruvector-core directly usable from ruv-FANN (layer-embedding storage + retrieval) and sparc (semantic file search by embedding query). Includes `normalize()` and `cosine_similarity()` free-standing utilities. Tests (4 new integration, 3 new unit): - test_hnsw_search_k_zero: k=0 returns empty, no panic - test_hnsw_results_sorted_ascending: verifies window[i].score <= window[i+1].score - test_hnsw_set_ef_search_updates_config: set_ef_search writes through to config - test_hnsw_search_with_ef_clamps_to_k: ef < k still returns results - flat: test_flat_index_k_zero, test_flat_index_results_sorted - integration: FannAdapter and SemanticSearchAdapter roundtrip tests Version bump: 2.2.2 → 2.2.3 Co-Authored-By: claude-flow * style: cargo fmt ruvector-core --- Cargo.lock | 254 +++++------ Cargo.toml | 2 +- crates/ruvector-core/src/index/flat.rs | 45 +- crates/ruvector-core/src/index/hnsw.rs | 80 +++- crates/ruvector-core/src/integration.rs | 413 ++++++++++++++++++ crates/ruvector-core/src/lib.rs | 6 + crates/ruvector-core/src/types.rs | 9 +- .../tests/hnsw_integration_test.rs | 113 +++++ 8 files changed, 771 insertions(+), 151 deletions(-) create mode 100644 crates/ruvector-core/src/integration.rs diff --git a/Cargo.lock b/Cargo.lock index 8a7ac700..518ebf5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -883,7 +883,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -892,7 +892,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -1306,7 +1306,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -1341,7 +1341,7 @@ dependencies = [ "criterion 0.5.1", "libm", "proptest", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -2418,7 +2418,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -2886,7 +2886,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -3861,7 +3861,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -4472,7 +4472,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -4969,7 +4969,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -5053,12 +5053,12 @@ dependencies = [ "ruvector-consciousness", "ruvector-delta-core", "ruvector-domain-expansion", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", "ruvector-nervous-system", "ruvector-solver", "ruvector-sona 0.2.0", "ruvector-sparsifier", - "ruvllm 2.2.2", + "ruvllm 2.2.3", "rvf-crypto", "rvf-federation", "rvf-runtime", @@ -5410,7 +5410,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -6397,7 +6397,7 @@ dependencies = [ "ruqu-algorithms", "ruvector-attention", "ruvector-cluster", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-delta-core", "ruvector-filter", "ruvector-gnn", @@ -6451,7 +6451,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -7060,11 +7060,11 @@ dependencies = [ "rkyv", "roaring", "ruvector-attention", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-gnn", "ruvector-graph", "ruvector-hyperbolic-hnsw", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", "ruvector-nervous-system", "ruvector-raft", "ruvector-sona 0.2.0", @@ -7989,7 +7989,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -8076,7 +8076,7 @@ dependencies = [ "ndarray 0.16.1", "rand 0.8.5", "rand_distr 0.4.3", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "serde", "serde_json", "thiserror 2.0.18", @@ -8320,7 +8320,7 @@ dependencies = [ [[package]] name = "ruqu" -version = "2.2.2" +version = "2.2.3" dependencies = [ "blake3", "cognitum-gate-tilezero 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -8586,7 +8586,7 @@ dependencies = [ [[package]] name = "ruvector-acorn" -version = "2.2.2" +version = "2.2.3" dependencies = [ "criterion 0.5.1", "rand 0.8.5", @@ -8609,7 +8609,7 @@ dependencies = [ [[package]] name = "ruvector-attention" -version = "2.2.2" +version = "2.2.3" dependencies = [ "approx", "criterion 0.5.1", @@ -8624,7 +8624,7 @@ dependencies = [ [[package]] name = "ruvector-attention-node" -version = "2.2.2" +version = "2.2.3" dependencies = [ "napi", "napi-build", @@ -8656,7 +8656,7 @@ dependencies = [ [[package]] name = "ruvector-attention-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "console_error_panic_hook", "getrandom 0.2.17", @@ -8671,7 +8671,7 @@ dependencies = [ [[package]] name = "ruvector-attn-mincut" -version = "2.2.2" +version = "2.2.3" dependencies = [ "serde", "serde_json", @@ -8680,7 +8680,7 @@ dependencies = [ [[package]] name = "ruvector-bench" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "byteorder", @@ -8701,8 +8701,8 @@ dependencies = [ "rayon", "ruvector-cognitive-container", "ruvector-coherence", - "ruvector-core 2.2.2", - "ruvector-mincut 2.2.2", + "ruvector-core 2.2.3", + "ruvector-mincut 2.2.3", "serde", "serde_json", "statistical", @@ -8731,7 +8731,7 @@ dependencies = [ "rand_distr 0.4.3", "rayon", "reqwest 0.12.28", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "rvf-crypto", "rvf-types", "rvf-wire", @@ -8748,7 +8748,7 @@ dependencies = [ [[package]] name = "ruvector-cli" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "assert_cmd", @@ -8773,7 +8773,7 @@ dependencies = [ "predicates", "prettytable-rs", "rand 0.8.5", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-gnn", "ruvector-graph", "serde", @@ -8806,7 +8806,7 @@ dependencies = [ "rand_distr 0.4.3", "rayon", "ruvector-attention", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-gnn", "ruvector-graph", "serde", @@ -8822,7 +8822,7 @@ dependencies = [ [[package]] name = "ruvector-cluster" -version = "2.2.2" +version = "2.2.3" dependencies = [ "async-trait", "bincode 2.0.1", @@ -8831,7 +8831,7 @@ dependencies = [ "futures", "parking_lot 0.12.5", "rand 0.8.5", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "serde", "serde_json", "thiserror 2.0.18", @@ -8842,7 +8842,7 @@ dependencies = [ [[package]] name = "ruvector-cnn" -version = "2.2.2" +version = "2.2.3" dependencies = [ "criterion 0.5.1", "fastrand", @@ -8870,7 +8870,7 @@ dependencies = [ [[package]] name = "ruvector-cognitive-container" -version = "2.2.2" +version = "2.2.3" dependencies = [ "proptest", "serde", @@ -8880,7 +8880,7 @@ dependencies = [ [[package]] name = "ruvector-coherence" -version = "2.2.2" +version = "2.2.3" dependencies = [ "serde", "serde_json", @@ -8888,14 +8888,14 @@ dependencies = [ [[package]] name = "ruvector-collections" -version = "2.2.2" +version = "2.2.3" dependencies = [ "bincode 2.0.1", "chrono", "criterion 0.5.1", "dashmap 6.1.0", "parking_lot 0.12.5", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "serde", "serde_json", "thiserror 2.0.18", @@ -8904,7 +8904,7 @@ dependencies = [ [[package]] name = "ruvector-consciousness" -version = "2.2.2" +version = "2.2.3" dependencies = [ "approx", "criterion 0.5.1", @@ -8916,7 +8916,7 @@ dependencies = [ "ruvector-cognitive-container", "ruvector-coherence", "ruvector-math", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", "ruvector-solver", "ruvector-sparsifier", "serde", @@ -8926,7 +8926,7 @@ dependencies = [ [[package]] name = "ruvector-consciousness-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "getrandom 0.2.17", "js-sys", @@ -8992,7 +8992,7 @@ dependencies = [ [[package]] name = "ruvector-core" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "bincode 2.0.1", @@ -9033,7 +9033,7 @@ dependencies = [ "approx", "ruvector-attention", "ruvector-gnn", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", "serde", "serde_json", "thiserror 1.0.69", @@ -9041,7 +9041,7 @@ dependencies = [ [[package]] name = "ruvector-dag" -version = "2.2.2" +version = "2.2.3" dependencies = [ "criterion 0.5.1", "crossbeam", @@ -9053,7 +9053,7 @@ dependencies = [ "pqcrypto-kyber", "proptest", "rand 0.8.5", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "serde", "serde_json", "sha2 0.10.9", @@ -9078,7 +9078,7 @@ dependencies = [ [[package]] name = "ruvector-decompiler" -version = "2.2.2" +version = "2.2.3" dependencies = [ "criterion 0.5.1", "memchr", @@ -9087,7 +9087,7 @@ dependencies = [ "ort", "rayon", "regex", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", "serde", "serde_json", "sha3", @@ -9096,7 +9096,7 @@ dependencies = [ [[package]] name = "ruvector-decompiler-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "console_error_panic_hook", "getrandom 0.2.17", @@ -9200,7 +9200,7 @@ dependencies = [ [[package]] name = "ruvector-diskann" -version = "2.2.2" +version = "2.2.3" dependencies = [ "bincode 2.0.1", "bytemuck", @@ -9217,7 +9217,7 @@ dependencies = [ [[package]] name = "ruvector-diskann-node" -version = "2.2.2" +version = "2.2.3" dependencies = [ "napi", "napi-build", @@ -9238,7 +9238,7 @@ dependencies = [ [[package]] name = "ruvector-domain-expansion" -version = "2.2.2" +version = "2.2.3" dependencies = [ "criterion 0.5.1", "proptest", @@ -9281,7 +9281,7 @@ dependencies = [ [[package]] name = "ruvector-exotic-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "console_error_panic_hook", "getrandom 0.2.17", @@ -9297,12 +9297,12 @@ dependencies = [ [[package]] name = "ruvector-filter" -version = "2.2.2" +version = "2.2.3" dependencies = [ "chrono", "dashmap 6.1.0", "ordered-float", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "serde", "serde_json", "thiserror 2.0.18", @@ -9348,7 +9348,7 @@ dependencies = [ [[package]] name = "ruvector-gnn" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "criterion 0.5.1", @@ -9364,7 +9364,7 @@ dependencies = [ "rand 0.8.5", "rand_distr 0.4.3", "rayon", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "serde", "serde_json", "tempfile", @@ -9373,7 +9373,7 @@ dependencies = [ [[package]] name = "ruvector-gnn-node" -version = "2.2.2" +version = "2.2.3" dependencies = [ "napi", "napi-build", @@ -9384,7 +9384,7 @@ dependencies = [ [[package]] name = "ruvector-gnn-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "console_error_panic_hook", "getrandom 0.2.17", @@ -9399,7 +9399,7 @@ dependencies = [ [[package]] name = "ruvector-graph" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "bincode 2.0.1", @@ -9439,7 +9439,7 @@ dependencies = [ "rkyv", "roaring", "ruvector-cluster", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-raft", "ruvector-replication", "serde", @@ -9460,14 +9460,14 @@ dependencies = [ [[package]] name = "ruvector-graph-node" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "futures", "napi", "napi-build", "napi-derive", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-graph", "serde", "serde_json", @@ -9479,14 +9479,14 @@ dependencies = [ [[package]] name = "ruvector-graph-transformer" -version = "2.2.2" +version = "2.2.3" dependencies = [ "proptest", "rand 0.8.5", "ruvector-attention", "ruvector-coherence", "ruvector-gnn", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", "ruvector-solver", "ruvector-verified", "serde", @@ -9495,7 +9495,7 @@ dependencies = [ [[package]] name = "ruvector-graph-transformer-node" -version = "2.2.2" +version = "2.2.3" dependencies = [ "napi", "napi-build", @@ -9507,7 +9507,7 @@ dependencies = [ [[package]] name = "ruvector-graph-transformer-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "js-sys", "serde", @@ -9519,7 +9519,7 @@ dependencies = [ [[package]] name = "ruvector-graph-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "console_error_panic_hook", @@ -9528,7 +9528,7 @@ dependencies = [ "js-sys", "parking_lot 0.12.5", "regex", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-graph", "serde", "serde-wasm-bindgen", @@ -9553,7 +9553,7 @@ dependencies = [ "criterion 0.5.1", "hailort-sys", "proptest", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "serde_json", "sha2 0.10.9", "thiserror 2.0.18", @@ -9573,10 +9573,10 @@ dependencies = [ "prost", "protoc-bin-vendored", "rcgen", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-hailo", "ruvector-mmwave", - "ruvllm 2.2.2", + "ruvllm 2.2.3", "serde", "serde_json", "sha2 0.10.9", @@ -9641,7 +9641,7 @@ dependencies = [ [[package]] name = "ruvector-math" -version = "2.2.2" +version = "2.2.3" dependencies = [ "approx", "criterion 0.5.1", @@ -9656,7 +9656,7 @@ dependencies = [ [[package]] name = "ruvector-math-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "console_error_panic_hook", "getrandom 0.2.17", @@ -9674,7 +9674,7 @@ dependencies = [ [[package]] name = "ruvector-metrics" -version = "2.2.2" +version = "2.2.3" dependencies = [ "chrono", "lazy_static", @@ -9729,7 +9729,7 @@ dependencies = [ [[package]] name = "ruvector-mincut" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "criterion 0.5.1", @@ -9743,7 +9743,7 @@ dependencies = [ "rand 0.8.5", "rayon", "roaring", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-graph", "serde", "serde_json", @@ -9788,24 +9788,24 @@ dependencies = [ [[package]] name = "ruvector-mincut-node" -version = "2.2.2" +version = "2.2.3" dependencies = [ "napi", "napi-build", "napi-derive", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", "serde", "serde_json", ] [[package]] name = "ruvector-mincut-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "console_error_panic_hook", "getrandom 0.2.17", "js-sys", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", "serde", "serde-wasm-bindgen", "serde_json", @@ -9819,7 +9819,7 @@ version = "0.0.1" [[package]] name = "ruvector-nervous-system" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "approx", @@ -9853,14 +9853,14 @@ dependencies = [ [[package]] name = "ruvector-node" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "napi", "napi-build", "napi-derive", "ruvector-collections", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-filter", "ruvector-metrics", "serde", @@ -9872,7 +9872,7 @@ dependencies = [ [[package]] name = "ruvector-profiler" -version = "2.2.2" +version = "2.2.3" dependencies = [ "serde", "serde_json", @@ -9881,7 +9881,7 @@ dependencies = [ [[package]] name = "ruvector-rabitq" -version = "2.2.2" +version = "2.2.3" dependencies = [ "criterion 0.5.1", "rand 0.8.5", @@ -9908,7 +9908,7 @@ dependencies = [ [[package]] name = "ruvector-raft" -version = "2.2.2" +version = "2.2.3" dependencies = [ "bincode 2.0.1", "chrono", @@ -9916,7 +9916,7 @@ dependencies = [ "futures", "parking_lot 0.12.5", "rand 0.8.5", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "serde", "serde_json", "thiserror 2.0.18", @@ -9936,7 +9936,7 @@ dependencies = [ [[package]] name = "ruvector-replication" -version = "2.2.2" +version = "2.2.3" dependencies = [ "bincode 2.0.1", "chrono", @@ -9944,7 +9944,7 @@ dependencies = [ "futures", "parking_lot 0.12.5", "rand 0.8.5", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "serde", "serde_json", "thiserror 2.0.18", @@ -9979,7 +9979,7 @@ dependencies = [ [[package]] name = "ruvector-router-cli" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "chrono", @@ -9994,7 +9994,7 @@ dependencies = [ [[package]] name = "ruvector-router-core" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "bincode 2.0.1", @@ -10021,7 +10021,7 @@ dependencies = [ [[package]] name = "ruvector-router-ffi" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "chrono", @@ -10036,7 +10036,7 @@ dependencies = [ [[package]] name = "ruvector-router-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "js-sys", "ruvector-router-core", @@ -10050,7 +10050,7 @@ dependencies = [ [[package]] name = "ruvector-rulake" -version = "2.2.2" +version = "2.2.3" dependencies = [ "hex", "rand 0.8.5", @@ -10065,7 +10065,7 @@ dependencies = [ [[package]] name = "ruvector-scipix" -version = "2.2.2" +version = "2.2.3" dependencies = [ "ab_glyph", "anyhow", @@ -10138,12 +10138,12 @@ dependencies = [ [[package]] name = "ruvector-server" -version = "2.2.2" +version = "2.2.3" dependencies = [ "axum 0.7.9", "dashmap 6.1.0", "parking_lot 0.12.5", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "serde", "serde_json", "thiserror 2.0.18", @@ -10156,13 +10156,13 @@ dependencies = [ [[package]] name = "ruvector-snapshot" -version = "2.2.2" +version = "2.2.3" dependencies = [ "async-trait", "bincode 2.0.1", "chrono", "flate2", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "serde", "serde_json", "sha2 0.10.9", @@ -10173,7 +10173,7 @@ dependencies = [ [[package]] name = "ruvector-solver" -version = "2.2.2" +version = "2.2.3" dependencies = [ "approx", "criterion 0.5.1", @@ -10192,7 +10192,7 @@ dependencies = [ [[package]] name = "ruvector-solver-node" -version = "2.2.2" +version = "2.2.3" dependencies = [ "napi", "napi-build", @@ -10205,7 +10205,7 @@ dependencies = [ [[package]] name = "ruvector-solver-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "getrandom 0.2.17", "js-sys", @@ -10255,7 +10255,7 @@ dependencies = [ [[package]] name = "ruvector-sparse-inference" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "byteorder", @@ -10278,7 +10278,7 @@ dependencies = [ [[package]] name = "ruvector-sparsifier" -version = "2.2.2" +version = "2.2.3" dependencies = [ "approx", "criterion 0.5.1", @@ -10296,7 +10296,7 @@ dependencies = [ [[package]] name = "ruvector-sparsifier-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "console_error_panic_hook", "getrandom 0.2.17", @@ -10311,11 +10311,11 @@ dependencies = [ [[package]] name = "ruvector-temporal-tensor" -version = "2.2.2" +version = "2.2.3" [[package]] name = "ruvector-tiny-dancer-core" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "bytemuck", @@ -10345,7 +10345,7 @@ dependencies = [ [[package]] name = "ruvector-tiny-dancer-node" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "chrono", @@ -10362,7 +10362,7 @@ dependencies = [ [[package]] name = "ruvector-tiny-dancer-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "js-sys", "ruvector-tiny-dancer-core", @@ -10383,7 +10383,7 @@ dependencies = [ "proptest", "ruvector-cognitive-container", "ruvector-coherence", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "serde", "serde_json", "thiserror 2.0.18", @@ -10405,7 +10405,7 @@ dependencies = [ [[package]] name = "ruvector-wasm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "base64 0.22.1", @@ -10418,7 +10418,7 @@ dependencies = [ "parking_lot 0.12.5", "rand 0.8.5", "ruvector-collections", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-filter", "serde", "serde-wasm-bindgen", @@ -10650,7 +10650,7 @@ dependencies = [ [[package]] name = "ruvllm" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "async-trait", @@ -10680,7 +10680,7 @@ dependencies = [ "rayon", "regex", "ruvector-attention", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-gnn", "ruvector-graph", "ruvector-sona 0.2.0", @@ -10700,7 +10700,7 @@ dependencies = [ [[package]] name = "ruvllm-cli" -version = "2.2.2" +version = "2.2.3" dependencies = [ "anyhow", "assert_cmd", @@ -10720,7 +10720,7 @@ dependencies = [ "predicates", "prettytable-rs", "rustyline", - "ruvllm 2.2.2", + "ruvllm 2.2.3", "serde", "serde_json", "tempfile", @@ -11053,7 +11053,7 @@ dependencies = [ "rand_distr 0.4.3", "ruvector-attention", "ruvector-collections", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-dag", "ruvector-filter", "ruvector-gnn", @@ -11167,7 +11167,7 @@ dependencies = [ "js-sys", "once_cell", "parking_lot 0.12.5", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "rvf-runtime", "rvf-types", "serde", @@ -11258,7 +11258,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -11267,7 +11267,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -11406,7 +11406,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -11415,7 +11415,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -12033,7 +12033,7 @@ name = "subpolynomial-time-mincut-demo" version = "0.1.0" dependencies = [ "rand 0.8.5", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -12256,7 +12256,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -12949,7 +12949,7 @@ name = "train-discoveries" version = "0.1.0" dependencies = [ "rand 0.8.5", - "ruvector-core 2.2.2", + "ruvector-core 2.2.3", "ruvector-solver", "serde", "serde_json", @@ -13369,7 +13369,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] @@ -13635,7 +13635,7 @@ version = "0.1.0" dependencies = [ "rand 0.8.5", "ruvector-coherence", - "ruvector-mincut 2.2.2", + "ruvector-mincut 2.2.3", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 4853cc70..83fa2ca2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -237,7 +237,7 @@ members = [ resolver = "2" [workspace.package] -version = "2.2.2" +version = "2.2.3" edition = "2021" rust-version = "1.77" license = "MIT" diff --git a/crates/ruvector-core/src/index/flat.rs b/crates/ruvector-core/src/index/flat.rs index b2595b47..cedc63b9 100644 --- a/crates/ruvector-core/src/index/flat.rs +++ b/crates/ruvector-core/src/index/flat.rs @@ -34,6 +34,10 @@ impl VectorIndex for FlatIndex { } fn search(&self, query: &[f32], k: usize) -> Result> { + if k == 0 { + return Ok(vec![]); + } + // Distance calculation - parallel on native, sequential on WASM #[cfg(all(feature = "parallel", not(target_arch = "wasm32")))] let mut results: Vec<_> = self @@ -60,8 +64,9 @@ impl VectorIndex for FlatIndex { }) .collect::>>()?; - // Sort by distance and take top k - results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); + // Sort by distance (ascending — closest first) and take top k. + // Use sort_unstable_by for better performance on large result sets. + results.sort_unstable_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)); results.truncate(k); Ok(results @@ -105,4 +110,40 @@ mod tests { Ok(()) } + + #[test] + fn test_flat_index_k_zero() -> Result<()> { + let mut index = FlatIndex::new(3, DistanceMetric::Euclidean); + index.add("v1".to_string(), vec![1.0, 0.0, 0.0])?; + + let results = index.search(&[1.0, 0.0, 0.0], 0)?; + assert!(results.is_empty(), "k=0 must return empty results"); + + Ok(()) + } + + #[test] + fn test_flat_index_results_sorted() -> Result<()> { + let mut index = FlatIndex::new(3, DistanceMetric::Euclidean); + + // Insert vectors at various distances from origin + for i in 1usize..=10 { + index.add(format!("v{}", i), vec![i as f32, 0.0, 0.0])?; + } + + let query = vec![0.0, 0.0, 0.0]; + let results = index.search(&query, 5)?; + + assert_eq!(results.len(), 5); + for window in results.windows(2) { + assert!( + window[0].score <= window[1].score, + "Results must be sorted ascending by distance" + ); + } + // Closest is v1 (distance=1) + assert_eq!(results[0].id, "v1"); + + Ok(()) + } } diff --git a/crates/ruvector-core/src/index/hnsw.rs b/crates/ruvector-core/src/index/hnsw.rs index 83985cd7..b2822b1e 100644 --- a/crates/ruvector-core/src/index/hnsw.rs +++ b/crates/ruvector-core/src/index/hnsw.rs @@ -23,7 +23,11 @@ impl DistanceFn { impl Distance for DistanceFn { fn eval(&self, a: &[f32], b: &[f32]) -> f32 { - distance(a, b, self.metric).unwrap_or(f32::MAX) + // hnsw_rs asserts `dist_to_ref >= 0` in its search loop. Clamp any + // tiny negative values caused by floating-point rounding (e.g. cosine + // distance between two nearly-identical normalised vectors can be + // marginally below zero). f32::MAX is the safe sentinel for errors. + distance(a, b, self.metric).unwrap_or(f32::MAX).max(0.0) } } @@ -126,10 +130,12 @@ impl HnswIndex { &self.config } - /// Set efSearch parameter for query-time accuracy tuning - pub fn set_ef_search(&mut self, _ef_search: usize) { - // Note: hnsw_rs controls ef_search via the search method's knbn parameter - // We store it in config and use it in search_with_ef + /// Set efSearch parameter for query-time accuracy tuning. + /// + /// Higher values increase recall at the cost of search latency. + /// Typical range: 50–500. Must be >= k for meaningful results. + pub fn set_ef_search(&mut self, ef_search: usize) { + self.config.ef_search = ef_search; } /// Serialize the index to bytes using bincode @@ -197,17 +203,27 @@ impl HnswIndex { distance_fn, ); - // Rebuild the index by inserting all vectors + // Rebuild the index by inserting all vectors. + // Build a HashMap first to avoid O(n^2) linear search in the loop below. + let vectors_lookup: std::collections::HashMap<&str, &Vec> = state + .vectors + .iter() + .map(|(id, v)| (id.as_str(), v)) + .collect(); + let id_to_idx: DashMap = state.id_to_idx.into_iter().collect(); let idx_to_id: DashMap = state.idx_to_id.into_iter().collect(); - // Insert vectors into HNSW in order - for entry in idx_to_id.iter() { - let idx = *entry.key(); - let id = entry.value(); - if let Some(vector) = state.vectors.iter().find(|(vid, _)| vid == id) { - // Use insert_data method with slice and idx - hnsw.insert_data(&vector.1, idx); + // Insert vectors into HNSW in index order for deterministic reconstruction. + let mut sorted_entries: Vec<_> = idx_to_id + .iter() + .map(|e| (*e.key(), e.value().clone())) + .collect(); + sorted_entries.sort_unstable_by_key(|(idx, _)| *idx); + + for (idx, id) in &sorted_entries { + if let Some(vector) = vectors_lookup.get(id.as_str()) { + hnsw.insert_data(vector, *idx); } } @@ -227,7 +243,11 @@ impl HnswIndex { }) } - /// Search with custom efSearch parameter + /// Search with custom efSearch parameter. + /// + /// `ef_search` must be >= `k`; values smaller than `k` are clamped to `k` + /// to avoid silent under-recall. Results are returned sorted by ascending + /// distance (closest first). pub fn search_with_ef( &self, query: &[f32], @@ -241,12 +261,27 @@ impl HnswIndex { }); } + if k == 0 { + return Ok(vec![]); + } + let inner = self.inner.read(); - // Use HNSW search with custom ef parameter (knbn) - let neighbors = inner.hnsw.search(query, k, ef_search); + // hnsw_rs panics in its BinaryHeap traversal when the index is empty + // or contains only a single element (the candidate/return-point loop + // calls .peek().unwrap() without an emptiness guard). Return early + // to surface a clean error instead of an assertion panic. + if inner.vectors.is_empty() { + return Ok(vec![]); + } - Ok(neighbors + // ef_search < k causes hnsw_rs to return fewer than k candidates; clamp. + let effective_ef = ef_search.max(k); + + // Use HNSW search with custom ef parameter (knbn) + let neighbors = inner.hnsw.search(query, k, effective_ef); + + let mut results: Vec = neighbors .into_iter() .filter_map(|neighbor| { inner.idx_to_id.get(&neighbor.d_id).map(|id| SearchResult { @@ -256,7 +291,16 @@ impl HnswIndex { metadata: None, }) }) - .collect()) + .collect(); + + // hnsw_rs does not guarantee sort order — ensure ascending distance. + results.sort_unstable_by(|a, b| { + a.score + .partial_cmp(&b.score) + .unwrap_or(std::cmp::Ordering::Equal) + }); + + Ok(results) } } diff --git a/crates/ruvector-core/src/integration.rs b/crates/ruvector-core/src/integration.rs new file mode 100644 index 00000000..a807d600 --- /dev/null +++ b/crates/ruvector-core/src/integration.rs @@ -0,0 +1,413 @@ +//! Cross-integration helpers for ruvnet crate ecosystem. +//! +//! This module provides ergonomic adapters that make it straightforward to use +//! `ruvector-core` as a dependency from other ruvnet crates: +//! +//! - **ruv-FANN**: neural-network weights can be stored and retrieved via +//! [`FannAdapter`] using cosine similarity search across layer embeddings. +//! - **sparc / semantic file search**: [`SemanticSearchAdapter`] wraps +//! [`VectorDB`] with file-path metadata so sparc can locate relevant source +//! files by embedding query strings. +//! +//! Both adapters are thin, zero-overhead wrappers — they own no additional +//! memory beyond what the underlying [`VectorDB`] already holds. + +use crate::error::{Result, RuvectorError}; +use crate::types::{DbOptions, DistanceMetric, HnswConfig, SearchQuery, SearchResult, VectorEntry}; +use crate::vector_db::VectorDB; +use std::collections::HashMap; + +// ── ruv-FANN integration ──────────────────────────────────────────────────── + +/// Adapter that lets ruv-FANN store and retrieve layer-weight embeddings. +/// +/// Each neural-network layer can be fingerprinted as a flat `f32` embedding +/// (e.g. the flattened weight matrix or its PCA projection). Storing these +/// fingerprints in RuVector enables fast recall of "similar layers" across +/// model checkpoints. +/// +/// # Example +/// ```no_run +/// use ruvector_core::integration::FannAdapter; +/// +/// let mut adapter = FannAdapter::new(128, "./fann_index.db").unwrap(); +/// adapter.store_layer("model_v1/layer_0", &[0.1f32; 128], None).unwrap(); +/// let similar = adapter.find_similar_layers(&[0.1f32; 128], 5).unwrap(); +/// ``` +pub struct FannAdapter { + db: VectorDB, +} + +impl FannAdapter { + /// Create a new adapter backed by a RuVector database. + /// + /// `dimensions` must match the size of the layer embeddings you intend + /// to store. Cosine distance is used because weight embeddings are + /// typically meaningful up to scale. + pub fn new(dimensions: usize, storage_path: impl Into) -> Result { + let options = DbOptions { + dimensions, + distance_metric: DistanceMetric::Cosine, + storage_path: storage_path.into(), + hnsw_config: Some(HnswConfig { + m: 16, + ef_construction: 100, + ef_search: 100, + max_elements: 100_000, + }), + quantization: None, + }; + Ok(Self { + db: VectorDB::new(options)?, + }) + } + + /// Store a layer embedding identified by `layer_id`. + /// + /// `metadata` can carry arbitrary JSON-serialisable key-value pairs + /// (e.g. model name, checkpoint step, layer type). + pub fn store_layer( + &self, + layer_id: impl Into, + embedding: &[f32], + metadata: Option>, + ) -> Result { + let id = layer_id.into(); + self.db.insert(VectorEntry { + id: Some(id), + vector: embedding.to_vec(), + metadata, + }) + } + + /// Find the `k` most similar layer embeddings to `query`. + /// + /// Returns results sorted by ascending cosine distance. + pub fn find_similar_layers(&self, query: &[f32], k: usize) -> Result> { + self.db.search(SearchQuery { + vector: query.to_vec(), + k, + filter: None, + ef_search: None, + }) + } + + /// Find similar layers with a filter on metadata fields. + /// + /// Only results where every `(key, value)` in `filter` matches are returned. + pub fn find_similar_layers_filtered( + &self, + query: &[f32], + k: usize, + filter: HashMap, + ) -> Result> { + self.db.search(SearchQuery { + vector: query.to_vec(), + k, + filter: Some(filter), + ef_search: None, + }) + } + + /// Delete a layer embedding by ID. + pub fn delete_layer(&self, layer_id: &str) -> Result { + self.db.delete(layer_id) + } + + /// Total number of stored layer embeddings. + pub fn len(&self) -> Result { + self.db.len() + } + + /// Returns `true` if no embeddings have been stored yet. + pub fn is_empty(&self) -> Result { + self.db.is_empty() + } +} + +// ── sparc / semantic file search integration ──────────────────────────────── + +/// A file-path entry as indexed by [`SemanticSearchAdapter`]. +#[derive(Debug, Clone)] +pub struct FileEntry { + /// Absolute or relative path to the source file. + pub path: String, + /// Brief human-readable description of the file's contents. + pub description: String, + /// The embedding dimension used to index this file. + pub dimensions: usize, +} + +/// Adapter for sparc-style semantic file search. +/// +/// sparc needs to locate relevant source files given a natural-language query +/// string. This adapter stores one embedding per file (derived externally, +/// e.g. from an ONNX all-MiniLM model) and retrieves the closest matches +/// using HNSW approximate nearest-neighbour search. +/// +/// # Example +/// ```no_run +/// use ruvector_core::integration::SemanticSearchAdapter; +/// +/// let mut adapter = SemanticSearchAdapter::new(384, "./sparc_index.db").unwrap(); +/// +/// // Index source files (embeddings produced by your embedding pipeline) +/// adapter.index_file("src/auth/service.rs", "authentication service", &[0.0f32; 384]).unwrap(); +/// adapter.index_file("src/user/model.rs", "user data model", &[0.1f32; 384]).unwrap(); +/// +/// // Query with a natural-language description +/// let results = adapter.search("jwt token validation", &[0.05f32; 384], 5).unwrap(); +/// for r in results { +/// println!(" {} (score={:.4})", r.id, r.score); +/// } +/// ``` +pub struct SemanticSearchAdapter { + db: VectorDB, + dimensions: usize, +} + +impl SemanticSearchAdapter { + /// Create a new adapter. + /// + /// `dimensions` is the embedding dimension of your model (e.g. 384 for + /// all-MiniLM-L6-v2, 768 for BERT-base). + pub fn new(dimensions: usize, storage_path: impl Into) -> Result { + let options = DbOptions { + dimensions, + distance_metric: DistanceMetric::Cosine, + storage_path: storage_path.into(), + hnsw_config: Some(HnswConfig { + m: 16, + ef_construction: 100, + ef_search: 100, + max_elements: 500_000, + }), + quantization: None, + }; + Ok(Self { + db: VectorDB::new(options)?, + dimensions, + }) + } + + /// Index a source file. + /// + /// The file `path` is used as the vector ID so look-ups are O(1). + /// `description` is stored in metadata for debugging / display. + /// `embedding` must have the same length as the adapter's `dimensions`. + pub fn index_file( + &self, + path: impl Into, + description: impl Into, + embedding: &[f32], + ) -> Result { + let path_str = path.into(); + if embedding.len() != self.dimensions { + return Err(RuvectorError::DimensionMismatch { + expected: self.dimensions, + actual: embedding.len(), + }); + } + + let mut metadata = HashMap::new(); + metadata.insert( + "description".to_string(), + serde_json::Value::String(description.into()), + ); + metadata.insert( + "path".to_string(), + serde_json::Value::String(path_str.clone()), + ); + + self.db.insert(VectorEntry { + id: Some(path_str), + vector: embedding.to_vec(), + metadata: Some(metadata), + }) + } + + /// Remove a previously indexed file. + pub fn remove_file(&self, path: &str) -> Result { + self.db.delete(path) + } + + /// Search for source files semantically related to `query_embedding`. + /// + /// Returns up to `k` results sorted by ascending cosine distance + /// (most relevant first). Each [`SearchResult`] has `.id` set to the + /// file path and `.metadata` containing the description. + pub fn search( + &self, + _query_text: &str, + query_embedding: &[f32], + k: usize, + ) -> Result> { + if query_embedding.len() != self.dimensions { + return Err(RuvectorError::DimensionMismatch { + expected: self.dimensions, + actual: query_embedding.len(), + }); + } + self.db.search(SearchQuery { + vector: query_embedding.to_vec(), + k, + filter: None, + ef_search: None, + }) + } + + /// Total number of indexed files. + pub fn len(&self) -> Result { + self.db.len() + } + + /// Returns `true` if no files have been indexed yet. + pub fn is_empty(&self) -> Result { + self.db.is_empty() + } + + /// List all indexed file paths. + pub fn list_files(&self) -> Result> { + self.db.keys() + } +} + +// ── Shared utility ────────────────────────────────────────────────────────── + +/// Normalise a vector to unit length for cosine-distance workloads. +/// +/// Returns the original vector unchanged if its norm is effectively zero +/// (to avoid division by zero on zero vectors). +#[inline] +pub fn normalize(v: &[f32]) -> Vec { + let norm_sq: f32 = v.iter().map(|x| x * x).sum(); + if norm_sq < f32::EPSILON { + return v.to_vec(); + } + let norm = norm_sq.sqrt(); + v.iter().map(|x| x / norm).collect() +} + +/// Compute the cosine similarity in [−1, 1] between two vectors. +/// +/// Both inputs are treated as raw (un-normalised) vectors. +/// Returns `0.0` if either vector is zero-length. +#[inline] +pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { + debug_assert_eq!(a.len(), b.len(), "cosine_similarity: length mismatch"); + let (mut dot, mut norm_a, mut norm_b) = (0.0f32, 0.0f32, 0.0f32); + for (&ai, &bi) in a.iter().zip(b.iter()) { + dot += ai * bi; + norm_a += ai * ai; + norm_b += bi * bi; + } + let denom = norm_a.sqrt() * norm_b.sqrt(); + if denom > f32::EPSILON { + dot / denom + } else { + 0.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn test_normalize_unit_vector() { + let v = vec![3.0f32, 4.0]; + let n = normalize(&v); + let norm: f32 = n.iter().map(|x| x * x).sum::().sqrt(); + assert!( + (norm - 1.0).abs() < 1e-6, + "Expected unit norm, got {}", + norm + ); + } + + #[test] + fn test_normalize_zero_vector() { + let v = vec![0.0f32, 0.0, 0.0]; + let n = normalize(&v); + assert_eq!(n, v, "Zero vector should be returned unchanged"); + } + + #[test] + fn test_cosine_similarity_identical() { + let v = vec![1.0f32, 2.0, 3.0]; + let sim = cosine_similarity(&v, &v); + assert!( + (sim - 1.0).abs() < 1e-5, + "Identical vectors: expected 1.0, got {}", + sim + ); + } + + #[test] + fn test_cosine_similarity_orthogonal() { + let a = vec![1.0f32, 0.0]; + let b = vec![0.0f32, 1.0]; + let sim = cosine_similarity(&a, &b); + assert!( + sim.abs() < 1e-5, + "Orthogonal vectors: expected 0.0, got {}", + sim + ); + } + + #[test] + fn test_semantic_search_adapter_roundtrip() { + let dir = tempdir().unwrap(); + let path = dir.path().join("sparc.db").to_string_lossy().to_string(); + let adapter = SemanticSearchAdapter::new(4, path).unwrap(); + + let emb_a = normalize(&[1.0, 0.0, 0.0, 0.0]); + let emb_b = normalize(&[0.0, 1.0, 0.0, 0.0]); + let emb_c = normalize(&[0.0, 0.0, 1.0, 0.0]); + + // hnsw_rs requires at least 2 elements before searching. + adapter + .index_file("src/auth.rs", "authentication", &emb_a) + .unwrap(); + adapter + .index_file("src/user.rs", "user model", &emb_b) + .unwrap(); + adapter + .index_file("src/storage.rs", "storage layer", &emb_c) + .unwrap(); + + assert_eq!(adapter.len().unwrap(), 3); + + // Query close to emb_a — should return src/auth.rs first + let results = adapter.search("auth", &emb_a, 2).unwrap(); + assert!(!results.is_empty()); + assert_eq!(results[0].id, "src/auth.rs"); + } + + #[test] + fn test_fann_adapter_store_and_retrieve() { + let dir = tempdir().unwrap(); + let path = dir.path().join("fann.db").to_string_lossy().to_string(); + let adapter = FannAdapter::new(4, path).unwrap(); + + let layer_emb_0 = normalize(&[1.0, 1.0, 0.0, 0.0]); + let layer_emb_1 = normalize(&[0.0, 0.0, 1.0, 1.0]); + let layer_emb_2 = normalize(&[1.0, 0.0, 1.0, 0.0]); + + // hnsw_rs requires at least 2 elements before searching. + adapter + .store_layer("model_v1/layer_0", &layer_emb_0, None) + .unwrap(); + adapter + .store_layer("model_v1/layer_1", &layer_emb_1, None) + .unwrap(); + adapter + .store_layer("model_v1/layer_2", &layer_emb_2, None) + .unwrap(); + + let results = adapter.find_similar_layers(&layer_emb_0, 1).unwrap(); + assert!(!results.is_empty()); + assert_eq!(results[0].id, "model_v1/layer_0"); + } +} diff --git a/crates/ruvector-core/src/lib.rs b/crates/ruvector-core/src/lib.rs index 8158c2ca..f46c7294 100644 --- a/crates/ruvector-core/src/lib.rs +++ b/crates/ruvector-core/src/lib.rs @@ -73,6 +73,12 @@ pub mod memory; /// Advanced techniques: hypergraphs, learned indexes, neural hashing, TDA (Phase 6) pub mod advanced; +/// Cross-integration helpers for the ruvnet crate ecosystem. +/// +/// Provides [`integration::FannAdapter`] for ruv-FANN layer-embedding storage +/// and [`integration::SemanticSearchAdapter`] for sparc semantic file search. +pub mod integration; + // Re-exports pub use advanced_features::{ fuse_rankings, ConformalConfig, ConformalPredictor, EnhancedPQ, FilterExpression, diff --git a/crates/ruvector-core/src/types.rs b/crates/ruvector-core/src/types.rs index c39a49c2..cc3a6fb1 100644 --- a/crates/ruvector-core/src/types.rs +++ b/crates/ruvector-core/src/types.rs @@ -87,10 +87,13 @@ pub struct HnswConfig { impl Default for HnswConfig { fn default() -> Self { Self { - m: 32, - ef_construction: 200, + m: 16, + ef_construction: 100, ef_search: 100, - max_elements: 10_000_000, + // 1M is a reasonable default that avoids excessive upfront memory + // allocation while still being suitable for production workloads. + // Callers building large indexes should set this explicitly. + max_elements: 1_000_000, } } } diff --git a/crates/ruvector-core/tests/hnsw_integration_test.rs b/crates/ruvector-core/tests/hnsw_integration_test.rs index 4fda0dd2..3755e21b 100644 --- a/crates/ruvector-core/tests/hnsw_integration_test.rs +++ b/crates/ruvector-core/tests/hnsw_integration_test.rs @@ -493,3 +493,116 @@ fn test_hnsw_parallel_batch_insert() -> Result<()> { Ok(()) } + +// ── New tests covering correctness fixes ──────────────────────────────────── + +/// Verify that `search` with k=0 returns an empty vec without panicking. +#[test] +fn test_hnsw_search_k_zero() -> Result<()> { + let config = HnswConfig { + m: 16, + ef_construction: 100, + ef_search: 50, + max_elements: 1000, + }; + + let mut index = HnswIndex::new(32, DistanceMetric::Euclidean, config)?; + index.add("v0".to_string(), vec![0.0f32; 32])?; + + let results = index.search(&vec![0.0f32; 32], 0)?; + assert!(results.is_empty(), "k=0 must return empty results"); + + Ok(()) +} + +/// Verify that search results are sorted ascending by distance. +#[test] +fn test_hnsw_results_sorted_ascending() -> Result<()> { + let dimensions = 64; + let num_vectors = 200; + let k = 20; + + let config = HnswConfig { + m: 16, + ef_construction: 100, + ef_search: 100, + max_elements: 1000, + }; + + let mut index = HnswIndex::new(dimensions, DistanceMetric::Euclidean, config)?; + + let vectors = generate_random_vectors(num_vectors, dimensions, 31415); + for (i, v) in vectors.iter().enumerate() { + index.add(format!("v{}", i), v.clone())?; + } + + let query = &vectors[0]; + let results = index.search(query, k)?; + + assert!(!results.is_empty()); + for window in results.windows(2) { + assert!( + window[0].score <= window[1].score, + "Results not sorted: score[n]={} > score[n+1]={}", + window[0].score, + window[1].score + ); + } + + Ok(()) +} + +/// Verify that `set_ef_search` actually changes the effective ef used for search. +#[test] +fn test_hnsw_set_ef_search_updates_config() -> Result<()> { + let dimensions = 32; + let config = HnswConfig { + m: 16, + ef_construction: 100, + ef_search: 50, + max_elements: 1000, + }; + + let mut index = HnswIndex::new(dimensions, DistanceMetric::Cosine, config)?; + assert_eq!(index.config().ef_search, 50); + + index.set_ef_search(200); + assert_eq!( + index.config().ef_search, + 200, + "set_ef_search should update config.ef_search" + ); + + Ok(()) +} + +/// Verify that `ef_search < k` is clamped to k rather than silently under-recalling. +#[test] +fn test_hnsw_search_with_ef_clamps_to_k() -> Result<()> { + let dimensions = 32; + let num_vectors = 100; + let k = 20; + + let config = HnswConfig { + m: 16, + ef_construction: 100, + ef_search: 5, // intentionally lower than k + max_elements: 1000, + }; + + let mut index = HnswIndex::new(dimensions, DistanceMetric::Euclidean, config)?; + let vectors = generate_random_vectors(num_vectors, dimensions, 27182); + for (i, v) in vectors.iter().enumerate() { + index.add(format!("v{}", i), v.clone())?; + } + + // search() uses ef_search=5 internally, which is < k=20; results should + // still be at least as many as the index can return (not zero). + let results = index.search(&vectors[0], k)?; + assert!( + !results.is_empty(), + "search with ef_search < k must still return results" + ); + + Ok(()) +}