fix(core): HNSW correctness fixes, k=0 guard, sorted results, cross-integration helpers (v2.2.3) (#502)

* fix(core): correctness + safety fixes in HNSW/flat index + cross-integration helpers (v2.2.3)

Correctness fixes:
- hnsw: `DistanceFn::eval` now clamps distance to 0.0 — prevents hnsw_rs
  internal BinaryHeap assertion panic when floating-point rounding yields a
  marginally-negative cosine/euclidean distance for near-identical vectors
- hnsw: `set_ef_search` was a silent no-op; now correctly writes to
  `config.ef_search` so callers can tune recall at query time
- hnsw: `search_with_ef` clamps `ef_search` to `max(ef_search, k)` to
  prevent silent under-recall when ef_search < k (hnsw_rs constraint)
- hnsw: `search_with_ef` now explicitly returns an empty slice for k=0
  instead of forwarding to hnsw_rs which may panic
- hnsw: `search_with_ef` returns early (empty slice) when index is empty
  to avoid hnsw_rs BinaryHeap `.peek().unwrap()` panic on zero-element index
- hnsw: results are now explicitly sorted by ascending distance; hnsw_rs
  does not guarantee this order in all code paths
- hnsw: deserialization rebuilds the HNSW graph in index order
  (sorted by idx) and uses an O(n) HashMap lookup instead of O(n^2)
  linear search over the vectors vec during restore
- flat: added k=0 guard (returns empty slice, no panic)
- flat: switched sort to `sort_unstable_by` with a `partial_cmp` fallback
  to handle NaN distances gracefully and improve throughput on large sets

API improvement:
- types: `HnswConfig::default()` now uses `max_elements=1_000_000` (was
  10_000_000) and `m=16/ef_construction=100` to avoid excessive upfront
  memory allocation in the common case; large-index callers can still
  set `max_elements` explicitly

New module:
- integration: `FannAdapter` and `SemanticSearchAdapter` — thin wrappers
  that make ruvector-core directly usable from ruv-FANN (layer-embedding
  storage + retrieval) and sparc (semantic file search by embedding query).
  Includes `normalize()` and `cosine_similarity()` free-standing utilities.

Tests (4 new integration, 3 new unit):
- test_hnsw_search_k_zero: k=0 returns empty, no panic
- test_hnsw_results_sorted_ascending: verifies window[i].score <= window[i+1].score
- test_hnsw_set_ef_search_updates_config: set_ef_search writes through to config
- test_hnsw_search_with_ef_clamps_to_k: ef < k still returns results
- flat: test_flat_index_k_zero, test_flat_index_results_sorted
- integration: FannAdapter and SemanticSearchAdapter roundtrip tests

Version bump: 2.2.2 → 2.2.3

Co-Authored-By: claude-flow <ruv@ruv.net>

* style: cargo fmt ruvector-core
This commit is contained in:
rUv 2026-05-23 03:37:35 -04:00 committed by GitHub
parent 5126ba418f
commit e2350b759f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 771 additions and 151 deletions

254
Cargo.lock generated
View file

@ -883,7 +883,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -892,7 +892,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -1306,7 +1306,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -1341,7 +1341,7 @@ dependencies = [
"criterion 0.5.1",
"libm",
"proptest",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -2418,7 +2418,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -2886,7 +2886,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -3861,7 +3861,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -4472,7 +4472,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -4969,7 +4969,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -5053,12 +5053,12 @@ dependencies = [
"ruvector-consciousness",
"ruvector-delta-core",
"ruvector-domain-expansion",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
"ruvector-nervous-system",
"ruvector-solver",
"ruvector-sona 0.2.0",
"ruvector-sparsifier",
"ruvllm 2.2.2",
"ruvllm 2.2.3",
"rvf-crypto",
"rvf-federation",
"rvf-runtime",
@ -5410,7 +5410,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -6397,7 +6397,7 @@ dependencies = [
"ruqu-algorithms",
"ruvector-attention",
"ruvector-cluster",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-delta-core",
"ruvector-filter",
"ruvector-gnn",
@ -6451,7 +6451,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -7060,11 +7060,11 @@ dependencies = [
"rkyv",
"roaring",
"ruvector-attention",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-gnn",
"ruvector-graph",
"ruvector-hyperbolic-hnsw",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
"ruvector-nervous-system",
"ruvector-raft",
"ruvector-sona 0.2.0",
@ -7989,7 +7989,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -8076,7 +8076,7 @@ dependencies = [
"ndarray 0.16.1",
"rand 0.8.5",
"rand_distr 0.4.3",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"serde",
"serde_json",
"thiserror 2.0.18",
@ -8320,7 +8320,7 @@ dependencies = [
[[package]]
name = "ruqu"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"blake3",
"cognitum-gate-tilezero 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -8586,7 +8586,7 @@ dependencies = [
[[package]]
name = "ruvector-acorn"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"criterion 0.5.1",
"rand 0.8.5",
@ -8609,7 +8609,7 @@ dependencies = [
[[package]]
name = "ruvector-attention"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"approx",
"criterion 0.5.1",
@ -8624,7 +8624,7 @@ dependencies = [
[[package]]
name = "ruvector-attention-node"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"napi",
"napi-build",
@ -8656,7 +8656,7 @@ dependencies = [
[[package]]
name = "ruvector-attention-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"console_error_panic_hook",
"getrandom 0.2.17",
@ -8671,7 +8671,7 @@ dependencies = [
[[package]]
name = "ruvector-attn-mincut"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"serde",
"serde_json",
@ -8680,7 +8680,7 @@ dependencies = [
[[package]]
name = "ruvector-bench"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"byteorder",
@ -8701,8 +8701,8 @@ dependencies = [
"rayon",
"ruvector-cognitive-container",
"ruvector-coherence",
"ruvector-core 2.2.2",
"ruvector-mincut 2.2.2",
"ruvector-core 2.2.3",
"ruvector-mincut 2.2.3",
"serde",
"serde_json",
"statistical",
@ -8731,7 +8731,7 @@ dependencies = [
"rand_distr 0.4.3",
"rayon",
"reqwest 0.12.28",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"rvf-crypto",
"rvf-types",
"rvf-wire",
@ -8748,7 +8748,7 @@ dependencies = [
[[package]]
name = "ruvector-cli"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"assert_cmd",
@ -8773,7 +8773,7 @@ dependencies = [
"predicates",
"prettytable-rs",
"rand 0.8.5",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-gnn",
"ruvector-graph",
"serde",
@ -8806,7 +8806,7 @@ dependencies = [
"rand_distr 0.4.3",
"rayon",
"ruvector-attention",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-gnn",
"ruvector-graph",
"serde",
@ -8822,7 +8822,7 @@ dependencies = [
[[package]]
name = "ruvector-cluster"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"async-trait",
"bincode 2.0.1",
@ -8831,7 +8831,7 @@ dependencies = [
"futures",
"parking_lot 0.12.5",
"rand 0.8.5",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"serde",
"serde_json",
"thiserror 2.0.18",
@ -8842,7 +8842,7 @@ dependencies = [
[[package]]
name = "ruvector-cnn"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"criterion 0.5.1",
"fastrand",
@ -8870,7 +8870,7 @@ dependencies = [
[[package]]
name = "ruvector-cognitive-container"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"proptest",
"serde",
@ -8880,7 +8880,7 @@ dependencies = [
[[package]]
name = "ruvector-coherence"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"serde",
"serde_json",
@ -8888,14 +8888,14 @@ dependencies = [
[[package]]
name = "ruvector-collections"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"bincode 2.0.1",
"chrono",
"criterion 0.5.1",
"dashmap 6.1.0",
"parking_lot 0.12.5",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"serde",
"serde_json",
"thiserror 2.0.18",
@ -8904,7 +8904,7 @@ dependencies = [
[[package]]
name = "ruvector-consciousness"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"approx",
"criterion 0.5.1",
@ -8916,7 +8916,7 @@ dependencies = [
"ruvector-cognitive-container",
"ruvector-coherence",
"ruvector-math",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
"ruvector-solver",
"ruvector-sparsifier",
"serde",
@ -8926,7 +8926,7 @@ dependencies = [
[[package]]
name = "ruvector-consciousness-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"getrandom 0.2.17",
"js-sys",
@ -8992,7 +8992,7 @@ dependencies = [
[[package]]
name = "ruvector-core"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"bincode 2.0.1",
@ -9033,7 +9033,7 @@ dependencies = [
"approx",
"ruvector-attention",
"ruvector-gnn",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
"serde",
"serde_json",
"thiserror 1.0.69",
@ -9041,7 +9041,7 @@ dependencies = [
[[package]]
name = "ruvector-dag"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"criterion 0.5.1",
"crossbeam",
@ -9053,7 +9053,7 @@ dependencies = [
"pqcrypto-kyber",
"proptest",
"rand 0.8.5",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"serde",
"serde_json",
"sha2 0.10.9",
@ -9078,7 +9078,7 @@ dependencies = [
[[package]]
name = "ruvector-decompiler"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"criterion 0.5.1",
"memchr",
@ -9087,7 +9087,7 @@ dependencies = [
"ort",
"rayon",
"regex",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
"serde",
"serde_json",
"sha3",
@ -9096,7 +9096,7 @@ dependencies = [
[[package]]
name = "ruvector-decompiler-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"console_error_panic_hook",
"getrandom 0.2.17",
@ -9200,7 +9200,7 @@ dependencies = [
[[package]]
name = "ruvector-diskann"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"bincode 2.0.1",
"bytemuck",
@ -9217,7 +9217,7 @@ dependencies = [
[[package]]
name = "ruvector-diskann-node"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"napi",
"napi-build",
@ -9238,7 +9238,7 @@ dependencies = [
[[package]]
name = "ruvector-domain-expansion"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"criterion 0.5.1",
"proptest",
@ -9281,7 +9281,7 @@ dependencies = [
[[package]]
name = "ruvector-exotic-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"console_error_panic_hook",
"getrandom 0.2.17",
@ -9297,12 +9297,12 @@ dependencies = [
[[package]]
name = "ruvector-filter"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"chrono",
"dashmap 6.1.0",
"ordered-float",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"serde",
"serde_json",
"thiserror 2.0.18",
@ -9348,7 +9348,7 @@ dependencies = [
[[package]]
name = "ruvector-gnn"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"criterion 0.5.1",
@ -9364,7 +9364,7 @@ dependencies = [
"rand 0.8.5",
"rand_distr 0.4.3",
"rayon",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"serde",
"serde_json",
"tempfile",
@ -9373,7 +9373,7 @@ dependencies = [
[[package]]
name = "ruvector-gnn-node"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"napi",
"napi-build",
@ -9384,7 +9384,7 @@ dependencies = [
[[package]]
name = "ruvector-gnn-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"console_error_panic_hook",
"getrandom 0.2.17",
@ -9399,7 +9399,7 @@ dependencies = [
[[package]]
name = "ruvector-graph"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"bincode 2.0.1",
@ -9439,7 +9439,7 @@ dependencies = [
"rkyv",
"roaring",
"ruvector-cluster",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-raft",
"ruvector-replication",
"serde",
@ -9460,14 +9460,14 @@ dependencies = [
[[package]]
name = "ruvector-graph-node"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"futures",
"napi",
"napi-build",
"napi-derive",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-graph",
"serde",
"serde_json",
@ -9479,14 +9479,14 @@ dependencies = [
[[package]]
name = "ruvector-graph-transformer"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"proptest",
"rand 0.8.5",
"ruvector-attention",
"ruvector-coherence",
"ruvector-gnn",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
"ruvector-solver",
"ruvector-verified",
"serde",
@ -9495,7 +9495,7 @@ dependencies = [
[[package]]
name = "ruvector-graph-transformer-node"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"napi",
"napi-build",
@ -9507,7 +9507,7 @@ dependencies = [
[[package]]
name = "ruvector-graph-transformer-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"js-sys",
"serde",
@ -9519,7 +9519,7 @@ dependencies = [
[[package]]
name = "ruvector-graph-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"console_error_panic_hook",
@ -9528,7 +9528,7 @@ dependencies = [
"js-sys",
"parking_lot 0.12.5",
"regex",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-graph",
"serde",
"serde-wasm-bindgen",
@ -9553,7 +9553,7 @@ dependencies = [
"criterion 0.5.1",
"hailort-sys",
"proptest",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"serde_json",
"sha2 0.10.9",
"thiserror 2.0.18",
@ -9573,10 +9573,10 @@ dependencies = [
"prost",
"protoc-bin-vendored",
"rcgen",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-hailo",
"ruvector-mmwave",
"ruvllm 2.2.2",
"ruvllm 2.2.3",
"serde",
"serde_json",
"sha2 0.10.9",
@ -9641,7 +9641,7 @@ dependencies = [
[[package]]
name = "ruvector-math"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"approx",
"criterion 0.5.1",
@ -9656,7 +9656,7 @@ dependencies = [
[[package]]
name = "ruvector-math-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"console_error_panic_hook",
"getrandom 0.2.17",
@ -9674,7 +9674,7 @@ dependencies = [
[[package]]
name = "ruvector-metrics"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"chrono",
"lazy_static",
@ -9729,7 +9729,7 @@ dependencies = [
[[package]]
name = "ruvector-mincut"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"criterion 0.5.1",
@ -9743,7 +9743,7 @@ dependencies = [
"rand 0.8.5",
"rayon",
"roaring",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-graph",
"serde",
"serde_json",
@ -9788,24 +9788,24 @@ dependencies = [
[[package]]
name = "ruvector-mincut-node"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"napi",
"napi-build",
"napi-derive",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
"serde",
"serde_json",
]
[[package]]
name = "ruvector-mincut-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"console_error_panic_hook",
"getrandom 0.2.17",
"js-sys",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
"serde",
"serde-wasm-bindgen",
"serde_json",
@ -9819,7 +9819,7 @@ version = "0.0.1"
[[package]]
name = "ruvector-nervous-system"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"approx",
@ -9853,14 +9853,14 @@ dependencies = [
[[package]]
name = "ruvector-node"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"napi",
"napi-build",
"napi-derive",
"ruvector-collections",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-filter",
"ruvector-metrics",
"serde",
@ -9872,7 +9872,7 @@ dependencies = [
[[package]]
name = "ruvector-profiler"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"serde",
"serde_json",
@ -9881,7 +9881,7 @@ dependencies = [
[[package]]
name = "ruvector-rabitq"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"criterion 0.5.1",
"rand 0.8.5",
@ -9908,7 +9908,7 @@ dependencies = [
[[package]]
name = "ruvector-raft"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"bincode 2.0.1",
"chrono",
@ -9916,7 +9916,7 @@ dependencies = [
"futures",
"parking_lot 0.12.5",
"rand 0.8.5",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"serde",
"serde_json",
"thiserror 2.0.18",
@ -9936,7 +9936,7 @@ dependencies = [
[[package]]
name = "ruvector-replication"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"bincode 2.0.1",
"chrono",
@ -9944,7 +9944,7 @@ dependencies = [
"futures",
"parking_lot 0.12.5",
"rand 0.8.5",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"serde",
"serde_json",
"thiserror 2.0.18",
@ -9979,7 +9979,7 @@ dependencies = [
[[package]]
name = "ruvector-router-cli"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"chrono",
@ -9994,7 +9994,7 @@ dependencies = [
[[package]]
name = "ruvector-router-core"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"bincode 2.0.1",
@ -10021,7 +10021,7 @@ dependencies = [
[[package]]
name = "ruvector-router-ffi"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"chrono",
@ -10036,7 +10036,7 @@ dependencies = [
[[package]]
name = "ruvector-router-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"js-sys",
"ruvector-router-core",
@ -10050,7 +10050,7 @@ dependencies = [
[[package]]
name = "ruvector-rulake"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"hex",
"rand 0.8.5",
@ -10065,7 +10065,7 @@ dependencies = [
[[package]]
name = "ruvector-scipix"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"ab_glyph",
"anyhow",
@ -10138,12 +10138,12 @@ dependencies = [
[[package]]
name = "ruvector-server"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"axum 0.7.9",
"dashmap 6.1.0",
"parking_lot 0.12.5",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"serde",
"serde_json",
"thiserror 2.0.18",
@ -10156,13 +10156,13 @@ dependencies = [
[[package]]
name = "ruvector-snapshot"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"async-trait",
"bincode 2.0.1",
"chrono",
"flate2",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"serde",
"serde_json",
"sha2 0.10.9",
@ -10173,7 +10173,7 @@ dependencies = [
[[package]]
name = "ruvector-solver"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"approx",
"criterion 0.5.1",
@ -10192,7 +10192,7 @@ dependencies = [
[[package]]
name = "ruvector-solver-node"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"napi",
"napi-build",
@ -10205,7 +10205,7 @@ dependencies = [
[[package]]
name = "ruvector-solver-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"getrandom 0.2.17",
"js-sys",
@ -10255,7 +10255,7 @@ dependencies = [
[[package]]
name = "ruvector-sparse-inference"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"byteorder",
@ -10278,7 +10278,7 @@ dependencies = [
[[package]]
name = "ruvector-sparsifier"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"approx",
"criterion 0.5.1",
@ -10296,7 +10296,7 @@ dependencies = [
[[package]]
name = "ruvector-sparsifier-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"console_error_panic_hook",
"getrandom 0.2.17",
@ -10311,11 +10311,11 @@ dependencies = [
[[package]]
name = "ruvector-temporal-tensor"
version = "2.2.2"
version = "2.2.3"
[[package]]
name = "ruvector-tiny-dancer-core"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"bytemuck",
@ -10345,7 +10345,7 @@ dependencies = [
[[package]]
name = "ruvector-tiny-dancer-node"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"chrono",
@ -10362,7 +10362,7 @@ dependencies = [
[[package]]
name = "ruvector-tiny-dancer-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"js-sys",
"ruvector-tiny-dancer-core",
@ -10383,7 +10383,7 @@ dependencies = [
"proptest",
"ruvector-cognitive-container",
"ruvector-coherence",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"serde",
"serde_json",
"thiserror 2.0.18",
@ -10405,7 +10405,7 @@ dependencies = [
[[package]]
name = "ruvector-wasm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"base64 0.22.1",
@ -10418,7 +10418,7 @@ dependencies = [
"parking_lot 0.12.5",
"rand 0.8.5",
"ruvector-collections",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-filter",
"serde",
"serde-wasm-bindgen",
@ -10650,7 +10650,7 @@ dependencies = [
[[package]]
name = "ruvllm"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"async-trait",
@ -10680,7 +10680,7 @@ dependencies = [
"rayon",
"regex",
"ruvector-attention",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-gnn",
"ruvector-graph",
"ruvector-sona 0.2.0",
@ -10700,7 +10700,7 @@ dependencies = [
[[package]]
name = "ruvllm-cli"
version = "2.2.2"
version = "2.2.3"
dependencies = [
"anyhow",
"assert_cmd",
@ -10720,7 +10720,7 @@ dependencies = [
"predicates",
"prettytable-rs",
"rustyline",
"ruvllm 2.2.2",
"ruvllm 2.2.3",
"serde",
"serde_json",
"tempfile",
@ -11053,7 +11053,7 @@ dependencies = [
"rand_distr 0.4.3",
"ruvector-attention",
"ruvector-collections",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-dag",
"ruvector-filter",
"ruvector-gnn",
@ -11167,7 +11167,7 @@ dependencies = [
"js-sys",
"once_cell",
"parking_lot 0.12.5",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"rvf-runtime",
"rvf-types",
"serde",
@ -11258,7 +11258,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -11267,7 +11267,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -11406,7 +11406,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -11415,7 +11415,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -12033,7 +12033,7 @@ name = "subpolynomial-time-mincut-demo"
version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -12256,7 +12256,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -12949,7 +12949,7 @@ name = "train-discoveries"
version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-core 2.2.2",
"ruvector-core 2.2.3",
"ruvector-solver",
"serde",
"serde_json",
@ -13369,7 +13369,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]
@ -13635,7 +13635,7 @@ version = "0.1.0"
dependencies = [
"rand 0.8.5",
"ruvector-coherence",
"ruvector-mincut 2.2.2",
"ruvector-mincut 2.2.3",
]
[[package]]

View file

@ -237,7 +237,7 @@ members = [
resolver = "2"
[workspace.package]
version = "2.2.2"
version = "2.2.3"
edition = "2021"
rust-version = "1.77"
license = "MIT"

View file

@ -34,6 +34,10 @@ impl VectorIndex for FlatIndex {
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
if k == 0 {
return Ok(vec![]);
}
// Distance calculation - parallel on native, sequential on WASM
#[cfg(all(feature = "parallel", not(target_arch = "wasm32")))]
let mut results: Vec<_> = self
@ -60,8 +64,9 @@ impl VectorIndex for FlatIndex {
})
.collect::<Result<Vec<_>>>()?;
// Sort by distance and take top k
results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
// Sort by distance (ascending — closest first) and take top k.
// Use sort_unstable_by for better performance on large result sets.
results.sort_unstable_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
results.truncate(k);
Ok(results
@ -105,4 +110,40 @@ mod tests {
Ok(())
}
#[test]
fn test_flat_index_k_zero() -> Result<()> {
let mut index = FlatIndex::new(3, DistanceMetric::Euclidean);
index.add("v1".to_string(), vec![1.0, 0.0, 0.0])?;
let results = index.search(&[1.0, 0.0, 0.0], 0)?;
assert!(results.is_empty(), "k=0 must return empty results");
Ok(())
}
#[test]
fn test_flat_index_results_sorted() -> Result<()> {
let mut index = FlatIndex::new(3, DistanceMetric::Euclidean);
// Insert vectors at various distances from origin
for i in 1usize..=10 {
index.add(format!("v{}", i), vec![i as f32, 0.0, 0.0])?;
}
let query = vec![0.0, 0.0, 0.0];
let results = index.search(&query, 5)?;
assert_eq!(results.len(), 5);
for window in results.windows(2) {
assert!(
window[0].score <= window[1].score,
"Results must be sorted ascending by distance"
);
}
// Closest is v1 (distance=1)
assert_eq!(results[0].id, "v1");
Ok(())
}
}

View file

@ -23,7 +23,11 @@ impl DistanceFn {
impl Distance<f32> for DistanceFn {
fn eval(&self, a: &[f32], b: &[f32]) -> f32 {
distance(a, b, self.metric).unwrap_or(f32::MAX)
// hnsw_rs asserts `dist_to_ref >= 0` in its search loop. Clamp any
// tiny negative values caused by floating-point rounding (e.g. cosine
// distance between two nearly-identical normalised vectors can be
// marginally below zero). f32::MAX is the safe sentinel for errors.
distance(a, b, self.metric).unwrap_or(f32::MAX).max(0.0)
}
}
@ -126,10 +130,12 @@ impl HnswIndex {
&self.config
}
/// Set efSearch parameter for query-time accuracy tuning
pub fn set_ef_search(&mut self, _ef_search: usize) {
// Note: hnsw_rs controls ef_search via the search method's knbn parameter
// We store it in config and use it in search_with_ef
/// Set efSearch parameter for query-time accuracy tuning.
///
/// Higher values increase recall at the cost of search latency.
/// Typical range: 50500. Must be >= k for meaningful results.
pub fn set_ef_search(&mut self, ef_search: usize) {
self.config.ef_search = ef_search;
}
/// Serialize the index to bytes using bincode
@ -197,17 +203,27 @@ impl HnswIndex {
distance_fn,
);
// Rebuild the index by inserting all vectors
// Rebuild the index by inserting all vectors.
// Build a HashMap first to avoid O(n^2) linear search in the loop below.
let vectors_lookup: std::collections::HashMap<&str, &Vec<f32>> = state
.vectors
.iter()
.map(|(id, v)| (id.as_str(), v))
.collect();
let id_to_idx: DashMap<VectorId, usize> = state.id_to_idx.into_iter().collect();
let idx_to_id: DashMap<usize, VectorId> = state.idx_to_id.into_iter().collect();
// Insert vectors into HNSW in order
for entry in idx_to_id.iter() {
let idx = *entry.key();
let id = entry.value();
if let Some(vector) = state.vectors.iter().find(|(vid, _)| vid == id) {
// Use insert_data method with slice and idx
hnsw.insert_data(&vector.1, idx);
// Insert vectors into HNSW in index order for deterministic reconstruction.
let mut sorted_entries: Vec<_> = idx_to_id
.iter()
.map(|e| (*e.key(), e.value().clone()))
.collect();
sorted_entries.sort_unstable_by_key(|(idx, _)| *idx);
for (idx, id) in &sorted_entries {
if let Some(vector) = vectors_lookup.get(id.as_str()) {
hnsw.insert_data(vector, *idx);
}
}
@ -227,7 +243,11 @@ impl HnswIndex {
})
}
/// Search with custom efSearch parameter
/// Search with custom efSearch parameter.
///
/// `ef_search` must be >= `k`; values smaller than `k` are clamped to `k`
/// to avoid silent under-recall. Results are returned sorted by ascending
/// distance (closest first).
pub fn search_with_ef(
&self,
query: &[f32],
@ -241,12 +261,27 @@ impl HnswIndex {
});
}
if k == 0 {
return Ok(vec![]);
}
let inner = self.inner.read();
// Use HNSW search with custom ef parameter (knbn)
let neighbors = inner.hnsw.search(query, k, ef_search);
// hnsw_rs panics in its BinaryHeap traversal when the index is empty
// or contains only a single element (the candidate/return-point loop
// calls .peek().unwrap() without an emptiness guard). Return early
// to surface a clean error instead of an assertion panic.
if inner.vectors.is_empty() {
return Ok(vec![]);
}
Ok(neighbors
// ef_search < k causes hnsw_rs to return fewer than k candidates; clamp.
let effective_ef = ef_search.max(k);
// Use HNSW search with custom ef parameter (knbn)
let neighbors = inner.hnsw.search(query, k, effective_ef);
let mut results: Vec<SearchResult> = neighbors
.into_iter()
.filter_map(|neighbor| {
inner.idx_to_id.get(&neighbor.d_id).map(|id| SearchResult {
@ -256,7 +291,16 @@ impl HnswIndex {
metadata: None,
})
})
.collect())
.collect();
// hnsw_rs does not guarantee sort order — ensure ascending distance.
results.sort_unstable_by(|a, b| {
a.score
.partial_cmp(&b.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
Ok(results)
}
}

View file

@ -0,0 +1,413 @@
//! Cross-integration helpers for ruvnet crate ecosystem.
//!
//! This module provides ergonomic adapters that make it straightforward to use
//! `ruvector-core` as a dependency from other ruvnet crates:
//!
//! - **ruv-FANN**: neural-network weights can be stored and retrieved via
//! [`FannAdapter`] using cosine similarity search across layer embeddings.
//! - **sparc / semantic file search**: [`SemanticSearchAdapter`] wraps
//! [`VectorDB`] with file-path metadata so sparc can locate relevant source
//! files by embedding query strings.
//!
//! Both adapters are thin, zero-overhead wrappers — they own no additional
//! memory beyond what the underlying [`VectorDB`] already holds.
use crate::error::{Result, RuvectorError};
use crate::types::{DbOptions, DistanceMetric, HnswConfig, SearchQuery, SearchResult, VectorEntry};
use crate::vector_db::VectorDB;
use std::collections::HashMap;
// ── ruv-FANN integration ────────────────────────────────────────────────────
/// Adapter that lets ruv-FANN store and retrieve layer-weight embeddings.
///
/// Each neural-network layer can be fingerprinted as a flat `f32` embedding
/// (e.g. the flattened weight matrix or its PCA projection). Storing these
/// fingerprints in RuVector enables fast recall of "similar layers" across
/// model checkpoints.
///
/// # Example
/// ```no_run
/// use ruvector_core::integration::FannAdapter;
///
/// let mut adapter = FannAdapter::new(128, "./fann_index.db").unwrap();
/// adapter.store_layer("model_v1/layer_0", &[0.1f32; 128], None).unwrap();
/// let similar = adapter.find_similar_layers(&[0.1f32; 128], 5).unwrap();
/// ```
pub struct FannAdapter {
db: VectorDB,
}
impl FannAdapter {
/// Create a new adapter backed by a RuVector database.
///
/// `dimensions` must match the size of the layer embeddings you intend
/// to store. Cosine distance is used because weight embeddings are
/// typically meaningful up to scale.
pub fn new(dimensions: usize, storage_path: impl Into<String>) -> Result<Self> {
let options = DbOptions {
dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: storage_path.into(),
hnsw_config: Some(HnswConfig {
m: 16,
ef_construction: 100,
ef_search: 100,
max_elements: 100_000,
}),
quantization: None,
};
Ok(Self {
db: VectorDB::new(options)?,
})
}
/// Store a layer embedding identified by `layer_id`.
///
/// `metadata` can carry arbitrary JSON-serialisable key-value pairs
/// (e.g. model name, checkpoint step, layer type).
pub fn store_layer(
&self,
layer_id: impl Into<String>,
embedding: &[f32],
metadata: Option<HashMap<String, serde_json::Value>>,
) -> Result<String> {
let id = layer_id.into();
self.db.insert(VectorEntry {
id: Some(id),
vector: embedding.to_vec(),
metadata,
})
}
/// Find the `k` most similar layer embeddings to `query`.
///
/// Returns results sorted by ascending cosine distance.
pub fn find_similar_layers(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
self.db.search(SearchQuery {
vector: query.to_vec(),
k,
filter: None,
ef_search: None,
})
}
/// Find similar layers with a filter on metadata fields.
///
/// Only results where every `(key, value)` in `filter` matches are returned.
pub fn find_similar_layers_filtered(
&self,
query: &[f32],
k: usize,
filter: HashMap<String, serde_json::Value>,
) -> Result<Vec<SearchResult>> {
self.db.search(SearchQuery {
vector: query.to_vec(),
k,
filter: Some(filter),
ef_search: None,
})
}
/// Delete a layer embedding by ID.
pub fn delete_layer(&self, layer_id: &str) -> Result<bool> {
self.db.delete(layer_id)
}
/// Total number of stored layer embeddings.
pub fn len(&self) -> Result<usize> {
self.db.len()
}
/// Returns `true` if no embeddings have been stored yet.
pub fn is_empty(&self) -> Result<bool> {
self.db.is_empty()
}
}
// ── sparc / semantic file search integration ────────────────────────────────
/// A file-path entry as indexed by [`SemanticSearchAdapter`].
#[derive(Debug, Clone)]
pub struct FileEntry {
/// Absolute or relative path to the source file.
pub path: String,
/// Brief human-readable description of the file's contents.
pub description: String,
/// The embedding dimension used to index this file.
pub dimensions: usize,
}
/// Adapter for sparc-style semantic file search.
///
/// sparc needs to locate relevant source files given a natural-language query
/// string. This adapter stores one embedding per file (derived externally,
/// e.g. from an ONNX all-MiniLM model) and retrieves the closest matches
/// using HNSW approximate nearest-neighbour search.
///
/// # Example
/// ```no_run
/// use ruvector_core::integration::SemanticSearchAdapter;
///
/// let mut adapter = SemanticSearchAdapter::new(384, "./sparc_index.db").unwrap();
///
/// // Index source files (embeddings produced by your embedding pipeline)
/// adapter.index_file("src/auth/service.rs", "authentication service", &[0.0f32; 384]).unwrap();
/// adapter.index_file("src/user/model.rs", "user data model", &[0.1f32; 384]).unwrap();
///
/// // Query with a natural-language description
/// let results = adapter.search("jwt token validation", &[0.05f32; 384], 5).unwrap();
/// for r in results {
/// println!(" {} (score={:.4})", r.id, r.score);
/// }
/// ```
pub struct SemanticSearchAdapter {
db: VectorDB,
dimensions: usize,
}
impl SemanticSearchAdapter {
/// Create a new adapter.
///
/// `dimensions` is the embedding dimension of your model (e.g. 384 for
/// all-MiniLM-L6-v2, 768 for BERT-base).
pub fn new(dimensions: usize, storage_path: impl Into<String>) -> Result<Self> {
let options = DbOptions {
dimensions,
distance_metric: DistanceMetric::Cosine,
storage_path: storage_path.into(),
hnsw_config: Some(HnswConfig {
m: 16,
ef_construction: 100,
ef_search: 100,
max_elements: 500_000,
}),
quantization: None,
};
Ok(Self {
db: VectorDB::new(options)?,
dimensions,
})
}
/// Index a source file.
///
/// The file `path` is used as the vector ID so look-ups are O(1).
/// `description` is stored in metadata for debugging / display.
/// `embedding` must have the same length as the adapter's `dimensions`.
pub fn index_file(
&self,
path: impl Into<String>,
description: impl Into<String>,
embedding: &[f32],
) -> Result<String> {
let path_str = path.into();
if embedding.len() != self.dimensions {
return Err(RuvectorError::DimensionMismatch {
expected: self.dimensions,
actual: embedding.len(),
});
}
let mut metadata = HashMap::new();
metadata.insert(
"description".to_string(),
serde_json::Value::String(description.into()),
);
metadata.insert(
"path".to_string(),
serde_json::Value::String(path_str.clone()),
);
self.db.insert(VectorEntry {
id: Some(path_str),
vector: embedding.to_vec(),
metadata: Some(metadata),
})
}
/// Remove a previously indexed file.
pub fn remove_file(&self, path: &str) -> Result<bool> {
self.db.delete(path)
}
/// Search for source files semantically related to `query_embedding`.
///
/// Returns up to `k` results sorted by ascending cosine distance
/// (most relevant first). Each [`SearchResult`] has `.id` set to the
/// file path and `.metadata` containing the description.
pub fn search(
&self,
_query_text: &str,
query_embedding: &[f32],
k: usize,
) -> Result<Vec<SearchResult>> {
if query_embedding.len() != self.dimensions {
return Err(RuvectorError::DimensionMismatch {
expected: self.dimensions,
actual: query_embedding.len(),
});
}
self.db.search(SearchQuery {
vector: query_embedding.to_vec(),
k,
filter: None,
ef_search: None,
})
}
/// Total number of indexed files.
pub fn len(&self) -> Result<usize> {
self.db.len()
}
/// Returns `true` if no files have been indexed yet.
pub fn is_empty(&self) -> Result<bool> {
self.db.is_empty()
}
/// List all indexed file paths.
pub fn list_files(&self) -> Result<Vec<String>> {
self.db.keys()
}
}
// ── Shared utility ──────────────────────────────────────────────────────────
/// Normalise a vector to unit length for cosine-distance workloads.
///
/// Returns the original vector unchanged if its norm is effectively zero
/// (to avoid division by zero on zero vectors).
#[inline]
pub fn normalize(v: &[f32]) -> Vec<f32> {
let norm_sq: f32 = v.iter().map(|x| x * x).sum();
if norm_sq < f32::EPSILON {
return v.to_vec();
}
let norm = norm_sq.sqrt();
v.iter().map(|x| x / norm).collect()
}
/// Compute the cosine similarity in [1, 1] between two vectors.
///
/// Both inputs are treated as raw (un-normalised) vectors.
/// Returns `0.0` if either vector is zero-length.
#[inline]
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
debug_assert_eq!(a.len(), b.len(), "cosine_similarity: length mismatch");
let (mut dot, mut norm_a, mut norm_b) = (0.0f32, 0.0f32, 0.0f32);
for (&ai, &bi) in a.iter().zip(b.iter()) {
dot += ai * bi;
norm_a += ai * ai;
norm_b += bi * bi;
}
let denom = norm_a.sqrt() * norm_b.sqrt();
if denom > f32::EPSILON {
dot / denom
} else {
0.0
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_normalize_unit_vector() {
let v = vec![3.0f32, 4.0];
let n = normalize(&v);
let norm: f32 = n.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!(
(norm - 1.0).abs() < 1e-6,
"Expected unit norm, got {}",
norm
);
}
#[test]
fn test_normalize_zero_vector() {
let v = vec![0.0f32, 0.0, 0.0];
let n = normalize(&v);
assert_eq!(n, v, "Zero vector should be returned unchanged");
}
#[test]
fn test_cosine_similarity_identical() {
let v = vec![1.0f32, 2.0, 3.0];
let sim = cosine_similarity(&v, &v);
assert!(
(sim - 1.0).abs() < 1e-5,
"Identical vectors: expected 1.0, got {}",
sim
);
}
#[test]
fn test_cosine_similarity_orthogonal() {
let a = vec![1.0f32, 0.0];
let b = vec![0.0f32, 1.0];
let sim = cosine_similarity(&a, &b);
assert!(
sim.abs() < 1e-5,
"Orthogonal vectors: expected 0.0, got {}",
sim
);
}
#[test]
fn test_semantic_search_adapter_roundtrip() {
let dir = tempdir().unwrap();
let path = dir.path().join("sparc.db").to_string_lossy().to_string();
let adapter = SemanticSearchAdapter::new(4, path).unwrap();
let emb_a = normalize(&[1.0, 0.0, 0.0, 0.0]);
let emb_b = normalize(&[0.0, 1.0, 0.0, 0.0]);
let emb_c = normalize(&[0.0, 0.0, 1.0, 0.0]);
// hnsw_rs requires at least 2 elements before searching.
adapter
.index_file("src/auth.rs", "authentication", &emb_a)
.unwrap();
adapter
.index_file("src/user.rs", "user model", &emb_b)
.unwrap();
adapter
.index_file("src/storage.rs", "storage layer", &emb_c)
.unwrap();
assert_eq!(adapter.len().unwrap(), 3);
// Query close to emb_a — should return src/auth.rs first
let results = adapter.search("auth", &emb_a, 2).unwrap();
assert!(!results.is_empty());
assert_eq!(results[0].id, "src/auth.rs");
}
#[test]
fn test_fann_adapter_store_and_retrieve() {
let dir = tempdir().unwrap();
let path = dir.path().join("fann.db").to_string_lossy().to_string();
let adapter = FannAdapter::new(4, path).unwrap();
let layer_emb_0 = normalize(&[1.0, 1.0, 0.0, 0.0]);
let layer_emb_1 = normalize(&[0.0, 0.0, 1.0, 1.0]);
let layer_emb_2 = normalize(&[1.0, 0.0, 1.0, 0.0]);
// hnsw_rs requires at least 2 elements before searching.
adapter
.store_layer("model_v1/layer_0", &layer_emb_0, None)
.unwrap();
adapter
.store_layer("model_v1/layer_1", &layer_emb_1, None)
.unwrap();
adapter
.store_layer("model_v1/layer_2", &layer_emb_2, None)
.unwrap();
let results = adapter.find_similar_layers(&layer_emb_0, 1).unwrap();
assert!(!results.is_empty());
assert_eq!(results[0].id, "model_v1/layer_0");
}
}

View file

@ -73,6 +73,12 @@ pub mod memory;
/// Advanced techniques: hypergraphs, learned indexes, neural hashing, TDA (Phase 6)
pub mod advanced;
/// Cross-integration helpers for the ruvnet crate ecosystem.
///
/// Provides [`integration::FannAdapter`] for ruv-FANN layer-embedding storage
/// and [`integration::SemanticSearchAdapter`] for sparc semantic file search.
pub mod integration;
// Re-exports
pub use advanced_features::{
fuse_rankings, ConformalConfig, ConformalPredictor, EnhancedPQ, FilterExpression,

View file

@ -87,10 +87,13 @@ pub struct HnswConfig {
impl Default for HnswConfig {
fn default() -> Self {
Self {
m: 32,
ef_construction: 200,
m: 16,
ef_construction: 100,
ef_search: 100,
max_elements: 10_000_000,
// 1M is a reasonable default that avoids excessive upfront memory
// allocation while still being suitable for production workloads.
// Callers building large indexes should set this explicitly.
max_elements: 1_000_000,
}
}
}

View file

@ -493,3 +493,116 @@ fn test_hnsw_parallel_batch_insert() -> Result<()> {
Ok(())
}
// ── New tests covering correctness fixes ────────────────────────────────────
/// Verify that `search` with k=0 returns an empty vec without panicking.
#[test]
fn test_hnsw_search_k_zero() -> Result<()> {
let config = HnswConfig {
m: 16,
ef_construction: 100,
ef_search: 50,
max_elements: 1000,
};
let mut index = HnswIndex::new(32, DistanceMetric::Euclidean, config)?;
index.add("v0".to_string(), vec![0.0f32; 32])?;
let results = index.search(&vec![0.0f32; 32], 0)?;
assert!(results.is_empty(), "k=0 must return empty results");
Ok(())
}
/// Verify that search results are sorted ascending by distance.
#[test]
fn test_hnsw_results_sorted_ascending() -> Result<()> {
let dimensions = 64;
let num_vectors = 200;
let k = 20;
let config = HnswConfig {
m: 16,
ef_construction: 100,
ef_search: 100,
max_elements: 1000,
};
let mut index = HnswIndex::new(dimensions, DistanceMetric::Euclidean, config)?;
let vectors = generate_random_vectors(num_vectors, dimensions, 31415);
for (i, v) in vectors.iter().enumerate() {
index.add(format!("v{}", i), v.clone())?;
}
let query = &vectors[0];
let results = index.search(query, k)?;
assert!(!results.is_empty());
for window in results.windows(2) {
assert!(
window[0].score <= window[1].score,
"Results not sorted: score[n]={} > score[n+1]={}",
window[0].score,
window[1].score
);
}
Ok(())
}
/// Verify that `set_ef_search` actually changes the effective ef used for search.
#[test]
fn test_hnsw_set_ef_search_updates_config() -> Result<()> {
let dimensions = 32;
let config = HnswConfig {
m: 16,
ef_construction: 100,
ef_search: 50,
max_elements: 1000,
};
let mut index = HnswIndex::new(dimensions, DistanceMetric::Cosine, config)?;
assert_eq!(index.config().ef_search, 50);
index.set_ef_search(200);
assert_eq!(
index.config().ef_search,
200,
"set_ef_search should update config.ef_search"
);
Ok(())
}
/// Verify that `ef_search < k` is clamped to k rather than silently under-recalling.
#[test]
fn test_hnsw_search_with_ef_clamps_to_k() -> Result<()> {
let dimensions = 32;
let num_vectors = 100;
let k = 20;
let config = HnswConfig {
m: 16,
ef_construction: 100,
ef_search: 5, // intentionally lower than k
max_elements: 1000,
};
let mut index = HnswIndex::new(dimensions, DistanceMetric::Euclidean, config)?;
let vectors = generate_random_vectors(num_vectors, dimensions, 27182);
for (i, v) in vectors.iter().enumerate() {
index.add(format!("v{}", i), v.clone())?;
}
// search() uses ef_search=5 internally, which is < k=20; results should
// still be at least as many as the index can return (not zero).
let results = index.search(&vectors[0], k)?;
assert!(
!results.is_empty(),
"search with ef_search < k must still return results"
);
Ok(())
}