mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-23 12:55:26 +00:00
fix(core): HNSW correctness fixes, k=0 guard, sorted results, cross-integration helpers (v2.2.3) (#502)
* fix(core): correctness + safety fixes in HNSW/flat index + cross-integration helpers (v2.2.3) Correctness fixes: - hnsw: `DistanceFn::eval` now clamps distance to 0.0 — prevents hnsw_rs internal BinaryHeap assertion panic when floating-point rounding yields a marginally-negative cosine/euclidean distance for near-identical vectors - hnsw: `set_ef_search` was a silent no-op; now correctly writes to `config.ef_search` so callers can tune recall at query time - hnsw: `search_with_ef` clamps `ef_search` to `max(ef_search, k)` to prevent silent under-recall when ef_search < k (hnsw_rs constraint) - hnsw: `search_with_ef` now explicitly returns an empty slice for k=0 instead of forwarding to hnsw_rs which may panic - hnsw: `search_with_ef` returns early (empty slice) when index is empty to avoid hnsw_rs BinaryHeap `.peek().unwrap()` panic on zero-element index - hnsw: results are now explicitly sorted by ascending distance; hnsw_rs does not guarantee this order in all code paths - hnsw: deserialization rebuilds the HNSW graph in index order (sorted by idx) and uses an O(n) HashMap lookup instead of O(n^2) linear search over the vectors vec during restore - flat: added k=0 guard (returns empty slice, no panic) - flat: switched sort to `sort_unstable_by` with a `partial_cmp` fallback to handle NaN distances gracefully and improve throughput on large sets API improvement: - types: `HnswConfig::default()` now uses `max_elements=1_000_000` (was 10_000_000) and `m=16/ef_construction=100` to avoid excessive upfront memory allocation in the common case; large-index callers can still set `max_elements` explicitly New module: - integration: `FannAdapter` and `SemanticSearchAdapter` — thin wrappers that make ruvector-core directly usable from ruv-FANN (layer-embedding storage + retrieval) and sparc (semantic file search by embedding query). Includes `normalize()` and `cosine_similarity()` free-standing utilities. Tests (4 new integration, 3 new unit): - test_hnsw_search_k_zero: k=0 returns empty, no panic - test_hnsw_results_sorted_ascending: verifies window[i].score <= window[i+1].score - test_hnsw_set_ef_search_updates_config: set_ef_search writes through to config - test_hnsw_search_with_ef_clamps_to_k: ef < k still returns results - flat: test_flat_index_k_zero, test_flat_index_results_sorted - integration: FannAdapter and SemanticSearchAdapter roundtrip tests Version bump: 2.2.2 → 2.2.3 Co-Authored-By: claude-flow <ruv@ruv.net> * style: cargo fmt ruvector-core
This commit is contained in:
parent
5126ba418f
commit
e2350b759f
8 changed files with 771 additions and 151 deletions
254
Cargo.lock
generated
254
Cargo.lock
generated
|
|
@ -883,7 +883,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -892,7 +892,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1306,7 +1306,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1341,7 +1341,7 @@ dependencies = [
|
|||
"criterion 0.5.1",
|
||||
"libm",
|
||||
"proptest",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2418,7 +2418,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2886,7 +2886,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -3861,7 +3861,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -4472,7 +4472,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -4969,7 +4969,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -5053,12 +5053,12 @@ dependencies = [
|
|||
"ruvector-consciousness",
|
||||
"ruvector-delta-core",
|
||||
"ruvector-domain-expansion",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
"ruvector-nervous-system",
|
||||
"ruvector-solver",
|
||||
"ruvector-sona 0.2.0",
|
||||
"ruvector-sparsifier",
|
||||
"ruvllm 2.2.2",
|
||||
"ruvllm 2.2.3",
|
||||
"rvf-crypto",
|
||||
"rvf-federation",
|
||||
"rvf-runtime",
|
||||
|
|
@ -5410,7 +5410,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -6397,7 +6397,7 @@ dependencies = [
|
|||
"ruqu-algorithms",
|
||||
"ruvector-attention",
|
||||
"ruvector-cluster",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-delta-core",
|
||||
"ruvector-filter",
|
||||
"ruvector-gnn",
|
||||
|
|
@ -6451,7 +6451,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -7060,11 +7060,11 @@ dependencies = [
|
|||
"rkyv",
|
||||
"roaring",
|
||||
"ruvector-attention",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-gnn",
|
||||
"ruvector-graph",
|
||||
"ruvector-hyperbolic-hnsw",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
"ruvector-nervous-system",
|
||||
"ruvector-raft",
|
||||
"ruvector-sona 0.2.0",
|
||||
|
|
@ -7989,7 +7989,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -8076,7 +8076,7 @@ dependencies = [
|
|||
"ndarray 0.16.1",
|
||||
"rand 0.8.5",
|
||||
"rand_distr 0.4.3",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
|
|
@ -8320,7 +8320,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruqu"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"blake3",
|
||||
"cognitum-gate-tilezero 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
|
@ -8586,7 +8586,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-acorn"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"criterion 0.5.1",
|
||||
"rand 0.8.5",
|
||||
|
|
@ -8609,7 +8609,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-attention"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"approx",
|
||||
"criterion 0.5.1",
|
||||
|
|
@ -8624,7 +8624,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-attention-node"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"napi",
|
||||
"napi-build",
|
||||
|
|
@ -8656,7 +8656,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-attention-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"console_error_panic_hook",
|
||||
"getrandom 0.2.17",
|
||||
|
|
@ -8671,7 +8671,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-attn-mincut"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
@ -8680,7 +8680,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-bench"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"byteorder",
|
||||
|
|
@ -8701,8 +8701,8 @@ dependencies = [
|
|||
"rayon",
|
||||
"ruvector-cognitive-container",
|
||||
"ruvector-coherence",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-mincut 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"statistical",
|
||||
|
|
@ -8731,7 +8731,7 @@ dependencies = [
|
|||
"rand_distr 0.4.3",
|
||||
"rayon",
|
||||
"reqwest 0.12.28",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"rvf-crypto",
|
||||
"rvf-types",
|
||||
"rvf-wire",
|
||||
|
|
@ -8748,7 +8748,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-cli"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"assert_cmd",
|
||||
|
|
@ -8773,7 +8773,7 @@ dependencies = [
|
|||
"predicates",
|
||||
"prettytable-rs",
|
||||
"rand 0.8.5",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-gnn",
|
||||
"ruvector-graph",
|
||||
"serde",
|
||||
|
|
@ -8806,7 +8806,7 @@ dependencies = [
|
|||
"rand_distr 0.4.3",
|
||||
"rayon",
|
||||
"ruvector-attention",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-gnn",
|
||||
"ruvector-graph",
|
||||
"serde",
|
||||
|
|
@ -8822,7 +8822,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-cluster"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bincode 2.0.1",
|
||||
|
|
@ -8831,7 +8831,7 @@ dependencies = [
|
|||
"futures",
|
||||
"parking_lot 0.12.5",
|
||||
"rand 0.8.5",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
|
|
@ -8842,7 +8842,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-cnn"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"criterion 0.5.1",
|
||||
"fastrand",
|
||||
|
|
@ -8870,7 +8870,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-cognitive-container"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"proptest",
|
||||
"serde",
|
||||
|
|
@ -8880,7 +8880,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-coherence"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
@ -8888,14 +8888,14 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-collections"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"bincode 2.0.1",
|
||||
"chrono",
|
||||
"criterion 0.5.1",
|
||||
"dashmap 6.1.0",
|
||||
"parking_lot 0.12.5",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
|
|
@ -8904,7 +8904,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-consciousness"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"approx",
|
||||
"criterion 0.5.1",
|
||||
|
|
@ -8916,7 +8916,7 @@ dependencies = [
|
|||
"ruvector-cognitive-container",
|
||||
"ruvector-coherence",
|
||||
"ruvector-math",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
"ruvector-solver",
|
||||
"ruvector-sparsifier",
|
||||
"serde",
|
||||
|
|
@ -8926,7 +8926,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-consciousness-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"getrandom 0.2.17",
|
||||
"js-sys",
|
||||
|
|
@ -8992,7 +8992,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-core"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode 2.0.1",
|
||||
|
|
@ -9033,7 +9033,7 @@ dependencies = [
|
|||
"approx",
|
||||
"ruvector-attention",
|
||||
"ruvector-gnn",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 1.0.69",
|
||||
|
|
@ -9041,7 +9041,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-dag"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"criterion 0.5.1",
|
||||
"crossbeam",
|
||||
|
|
@ -9053,7 +9053,7 @@ dependencies = [
|
|||
"pqcrypto-kyber",
|
||||
"proptest",
|
||||
"rand 0.8.5",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2 0.10.9",
|
||||
|
|
@ -9078,7 +9078,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-decompiler"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"criterion 0.5.1",
|
||||
"memchr",
|
||||
|
|
@ -9087,7 +9087,7 @@ dependencies = [
|
|||
"ort",
|
||||
"rayon",
|
||||
"regex",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha3",
|
||||
|
|
@ -9096,7 +9096,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-decompiler-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"console_error_panic_hook",
|
||||
"getrandom 0.2.17",
|
||||
|
|
@ -9200,7 +9200,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-diskann"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"bincode 2.0.1",
|
||||
"bytemuck",
|
||||
|
|
@ -9217,7 +9217,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-diskann-node"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"napi",
|
||||
"napi-build",
|
||||
|
|
@ -9238,7 +9238,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-domain-expansion"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"criterion 0.5.1",
|
||||
"proptest",
|
||||
|
|
@ -9281,7 +9281,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-exotic-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"console_error_panic_hook",
|
||||
"getrandom 0.2.17",
|
||||
|
|
@ -9297,12 +9297,12 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-filter"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"dashmap 6.1.0",
|
||||
"ordered-float",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
|
|
@ -9348,7 +9348,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-gnn"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"criterion 0.5.1",
|
||||
|
|
@ -9364,7 +9364,7 @@ dependencies = [
|
|||
"rand 0.8.5",
|
||||
"rand_distr 0.4.3",
|
||||
"rayon",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
|
|
@ -9373,7 +9373,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-gnn-node"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"napi",
|
||||
"napi-build",
|
||||
|
|
@ -9384,7 +9384,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-gnn-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"console_error_panic_hook",
|
||||
"getrandom 0.2.17",
|
||||
|
|
@ -9399,7 +9399,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-graph"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode 2.0.1",
|
||||
|
|
@ -9439,7 +9439,7 @@ dependencies = [
|
|||
"rkyv",
|
||||
"roaring",
|
||||
"ruvector-cluster",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-raft",
|
||||
"ruvector-replication",
|
||||
"serde",
|
||||
|
|
@ -9460,14 +9460,14 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-graph-node"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"futures",
|
||||
"napi",
|
||||
"napi-build",
|
||||
"napi-derive",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-graph",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
@ -9479,14 +9479,14 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-graph-transformer"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"proptest",
|
||||
"rand 0.8.5",
|
||||
"ruvector-attention",
|
||||
"ruvector-coherence",
|
||||
"ruvector-gnn",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
"ruvector-solver",
|
||||
"ruvector-verified",
|
||||
"serde",
|
||||
|
|
@ -9495,7 +9495,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-graph-transformer-node"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"napi",
|
||||
"napi-build",
|
||||
|
|
@ -9507,7 +9507,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-graph-transformer-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"serde",
|
||||
|
|
@ -9519,7 +9519,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-graph-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"console_error_panic_hook",
|
||||
|
|
@ -9528,7 +9528,7 @@ dependencies = [
|
|||
"js-sys",
|
||||
"parking_lot 0.12.5",
|
||||
"regex",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-graph",
|
||||
"serde",
|
||||
"serde-wasm-bindgen",
|
||||
|
|
@ -9553,7 +9553,7 @@ dependencies = [
|
|||
"criterion 0.5.1",
|
||||
"hailort-sys",
|
||||
"proptest",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"serde_json",
|
||||
"sha2 0.10.9",
|
||||
"thiserror 2.0.18",
|
||||
|
|
@ -9573,10 +9573,10 @@ dependencies = [
|
|||
"prost",
|
||||
"protoc-bin-vendored",
|
||||
"rcgen",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-hailo",
|
||||
"ruvector-mmwave",
|
||||
"ruvllm 2.2.2",
|
||||
"ruvllm 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2 0.10.9",
|
||||
|
|
@ -9641,7 +9641,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-math"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"approx",
|
||||
"criterion 0.5.1",
|
||||
|
|
@ -9656,7 +9656,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-math-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"console_error_panic_hook",
|
||||
"getrandom 0.2.17",
|
||||
|
|
@ -9674,7 +9674,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-metrics"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"lazy_static",
|
||||
|
|
@ -9729,7 +9729,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-mincut"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"criterion 0.5.1",
|
||||
|
|
@ -9743,7 +9743,7 @@ dependencies = [
|
|||
"rand 0.8.5",
|
||||
"rayon",
|
||||
"roaring",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-graph",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
@ -9788,24 +9788,24 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-mincut-node"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"napi",
|
||||
"napi-build",
|
||||
"napi-derive",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ruvector-mincut-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"console_error_panic_hook",
|
||||
"getrandom 0.2.17",
|
||||
"js-sys",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
"serde",
|
||||
"serde-wasm-bindgen",
|
||||
"serde_json",
|
||||
|
|
@ -9819,7 +9819,7 @@ version = "0.0.1"
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-nervous-system"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"approx",
|
||||
|
|
@ -9853,14 +9853,14 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-node"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"napi",
|
||||
"napi-build",
|
||||
"napi-derive",
|
||||
"ruvector-collections",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-filter",
|
||||
"ruvector-metrics",
|
||||
"serde",
|
||||
|
|
@ -9872,7 +9872,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-profiler"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
@ -9881,7 +9881,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-rabitq"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"criterion 0.5.1",
|
||||
"rand 0.8.5",
|
||||
|
|
@ -9908,7 +9908,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-raft"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"bincode 2.0.1",
|
||||
"chrono",
|
||||
|
|
@ -9916,7 +9916,7 @@ dependencies = [
|
|||
"futures",
|
||||
"parking_lot 0.12.5",
|
||||
"rand 0.8.5",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
|
|
@ -9936,7 +9936,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-replication"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"bincode 2.0.1",
|
||||
"chrono",
|
||||
|
|
@ -9944,7 +9944,7 @@ dependencies = [
|
|||
"futures",
|
||||
"parking_lot 0.12.5",
|
||||
"rand 0.8.5",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
|
|
@ -9979,7 +9979,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-router-cli"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
|
|
@ -9994,7 +9994,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-router-core"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode 2.0.1",
|
||||
|
|
@ -10021,7 +10021,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-router-ffi"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
|
|
@ -10036,7 +10036,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-router-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"ruvector-router-core",
|
||||
|
|
@ -10050,7 +10050,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-rulake"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"hex",
|
||||
"rand 0.8.5",
|
||||
|
|
@ -10065,7 +10065,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-scipix"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"ab_glyph",
|
||||
"anyhow",
|
||||
|
|
@ -10138,12 +10138,12 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-server"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"axum 0.7.9",
|
||||
"dashmap 6.1.0",
|
||||
"parking_lot 0.12.5",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
|
|
@ -10156,13 +10156,13 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-snapshot"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bincode 2.0.1",
|
||||
"chrono",
|
||||
"flate2",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2 0.10.9",
|
||||
|
|
@ -10173,7 +10173,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-solver"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"approx",
|
||||
"criterion 0.5.1",
|
||||
|
|
@ -10192,7 +10192,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-solver-node"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"napi",
|
||||
"napi-build",
|
||||
|
|
@ -10205,7 +10205,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-solver-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"getrandom 0.2.17",
|
||||
"js-sys",
|
||||
|
|
@ -10255,7 +10255,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-sparse-inference"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"byteorder",
|
||||
|
|
@ -10278,7 +10278,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-sparsifier"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"approx",
|
||||
"criterion 0.5.1",
|
||||
|
|
@ -10296,7 +10296,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-sparsifier-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"console_error_panic_hook",
|
||||
"getrandom 0.2.17",
|
||||
|
|
@ -10311,11 +10311,11 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-temporal-tensor"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
|
||||
[[package]]
|
||||
name = "ruvector-tiny-dancer-core"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytemuck",
|
||||
|
|
@ -10345,7 +10345,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-tiny-dancer-node"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
|
|
@ -10362,7 +10362,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-tiny-dancer-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"ruvector-tiny-dancer-core",
|
||||
|
|
@ -10383,7 +10383,7 @@ dependencies = [
|
|||
"proptest",
|
||||
"ruvector-cognitive-container",
|
||||
"ruvector-coherence",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
|
|
@ -10405,7 +10405,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvector-wasm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64 0.22.1",
|
||||
|
|
@ -10418,7 +10418,7 @@ dependencies = [
|
|||
"parking_lot 0.12.5",
|
||||
"rand 0.8.5",
|
||||
"ruvector-collections",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-filter",
|
||||
"serde",
|
||||
"serde-wasm-bindgen",
|
||||
|
|
@ -10650,7 +10650,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvllm"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
|
|
@ -10680,7 +10680,7 @@ dependencies = [
|
|||
"rayon",
|
||||
"regex",
|
||||
"ruvector-attention",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-gnn",
|
||||
"ruvector-graph",
|
||||
"ruvector-sona 0.2.0",
|
||||
|
|
@ -10700,7 +10700,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ruvllm-cli"
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"assert_cmd",
|
||||
|
|
@ -10720,7 +10720,7 @@ dependencies = [
|
|||
"predicates",
|
||||
"prettytable-rs",
|
||||
"rustyline",
|
||||
"ruvllm 2.2.2",
|
||||
"ruvllm 2.2.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
|
|
@ -11053,7 +11053,7 @@ dependencies = [
|
|||
"rand_distr 0.4.3",
|
||||
"ruvector-attention",
|
||||
"ruvector-collections",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-dag",
|
||||
"ruvector-filter",
|
||||
"ruvector-gnn",
|
||||
|
|
@ -11167,7 +11167,7 @@ dependencies = [
|
|||
"js-sys",
|
||||
"once_cell",
|
||||
"parking_lot 0.12.5",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"rvf-runtime",
|
||||
"rvf-types",
|
||||
"serde",
|
||||
|
|
@ -11258,7 +11258,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -11267,7 +11267,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -11406,7 +11406,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -11415,7 +11415,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -12033,7 +12033,7 @@ name = "subpolynomial-time-mincut-demo"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -12256,7 +12256,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -12949,7 +12949,7 @@ name = "train-discoveries"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-core 2.2.2",
|
||||
"ruvector-core 2.2.3",
|
||||
"ruvector-solver",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
@ -13369,7 +13369,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -13635,7 +13635,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ruvector-coherence",
|
||||
"ruvector-mincut 2.2.2",
|
||||
"ruvector-mincut 2.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
|
|
@ -237,7 +237,7 @@ members = [
|
|||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "2.2.2"
|
||||
version = "2.2.3"
|
||||
edition = "2021"
|
||||
rust-version = "1.77"
|
||||
license = "MIT"
|
||||
|
|
|
|||
|
|
@ -34,6 +34,10 @@ impl VectorIndex for FlatIndex {
|
|||
}
|
||||
|
||||
fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
|
||||
if k == 0 {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
// Distance calculation - parallel on native, sequential on WASM
|
||||
#[cfg(all(feature = "parallel", not(target_arch = "wasm32")))]
|
||||
let mut results: Vec<_> = self
|
||||
|
|
@ -60,8 +64,9 @@ impl VectorIndex for FlatIndex {
|
|||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
// Sort by distance and take top k
|
||||
results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
|
||||
// Sort by distance (ascending — closest first) and take top k.
|
||||
// Use sort_unstable_by for better performance on large result sets.
|
||||
results.sort_unstable_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
results.truncate(k);
|
||||
|
||||
Ok(results
|
||||
|
|
@ -105,4 +110,40 @@ mod tests {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flat_index_k_zero() -> Result<()> {
|
||||
let mut index = FlatIndex::new(3, DistanceMetric::Euclidean);
|
||||
index.add("v1".to_string(), vec![1.0, 0.0, 0.0])?;
|
||||
|
||||
let results = index.search(&[1.0, 0.0, 0.0], 0)?;
|
||||
assert!(results.is_empty(), "k=0 must return empty results");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flat_index_results_sorted() -> Result<()> {
|
||||
let mut index = FlatIndex::new(3, DistanceMetric::Euclidean);
|
||||
|
||||
// Insert vectors at various distances from origin
|
||||
for i in 1usize..=10 {
|
||||
index.add(format!("v{}", i), vec![i as f32, 0.0, 0.0])?;
|
||||
}
|
||||
|
||||
let query = vec![0.0, 0.0, 0.0];
|
||||
let results = index.search(&query, 5)?;
|
||||
|
||||
assert_eq!(results.len(), 5);
|
||||
for window in results.windows(2) {
|
||||
assert!(
|
||||
window[0].score <= window[1].score,
|
||||
"Results must be sorted ascending by distance"
|
||||
);
|
||||
}
|
||||
// Closest is v1 (distance=1)
|
||||
assert_eq!(results[0].id, "v1");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,11 @@ impl DistanceFn {
|
|||
|
||||
impl Distance<f32> for DistanceFn {
|
||||
fn eval(&self, a: &[f32], b: &[f32]) -> f32 {
|
||||
distance(a, b, self.metric).unwrap_or(f32::MAX)
|
||||
// hnsw_rs asserts `dist_to_ref >= 0` in its search loop. Clamp any
|
||||
// tiny negative values caused by floating-point rounding (e.g. cosine
|
||||
// distance between two nearly-identical normalised vectors can be
|
||||
// marginally below zero). f32::MAX is the safe sentinel for errors.
|
||||
distance(a, b, self.metric).unwrap_or(f32::MAX).max(0.0)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -126,10 +130,12 @@ impl HnswIndex {
|
|||
&self.config
|
||||
}
|
||||
|
||||
/// Set efSearch parameter for query-time accuracy tuning
|
||||
pub fn set_ef_search(&mut self, _ef_search: usize) {
|
||||
// Note: hnsw_rs controls ef_search via the search method's knbn parameter
|
||||
// We store it in config and use it in search_with_ef
|
||||
/// Set efSearch parameter for query-time accuracy tuning.
|
||||
///
|
||||
/// Higher values increase recall at the cost of search latency.
|
||||
/// Typical range: 50–500. Must be >= k for meaningful results.
|
||||
pub fn set_ef_search(&mut self, ef_search: usize) {
|
||||
self.config.ef_search = ef_search;
|
||||
}
|
||||
|
||||
/// Serialize the index to bytes using bincode
|
||||
|
|
@ -197,17 +203,27 @@ impl HnswIndex {
|
|||
distance_fn,
|
||||
);
|
||||
|
||||
// Rebuild the index by inserting all vectors
|
||||
// Rebuild the index by inserting all vectors.
|
||||
// Build a HashMap first to avoid O(n^2) linear search in the loop below.
|
||||
let vectors_lookup: std::collections::HashMap<&str, &Vec<f32>> = state
|
||||
.vectors
|
||||
.iter()
|
||||
.map(|(id, v)| (id.as_str(), v))
|
||||
.collect();
|
||||
|
||||
let id_to_idx: DashMap<VectorId, usize> = state.id_to_idx.into_iter().collect();
|
||||
let idx_to_id: DashMap<usize, VectorId> = state.idx_to_id.into_iter().collect();
|
||||
|
||||
// Insert vectors into HNSW in order
|
||||
for entry in idx_to_id.iter() {
|
||||
let idx = *entry.key();
|
||||
let id = entry.value();
|
||||
if let Some(vector) = state.vectors.iter().find(|(vid, _)| vid == id) {
|
||||
// Use insert_data method with slice and idx
|
||||
hnsw.insert_data(&vector.1, idx);
|
||||
// Insert vectors into HNSW in index order for deterministic reconstruction.
|
||||
let mut sorted_entries: Vec<_> = idx_to_id
|
||||
.iter()
|
||||
.map(|e| (*e.key(), e.value().clone()))
|
||||
.collect();
|
||||
sorted_entries.sort_unstable_by_key(|(idx, _)| *idx);
|
||||
|
||||
for (idx, id) in &sorted_entries {
|
||||
if let Some(vector) = vectors_lookup.get(id.as_str()) {
|
||||
hnsw.insert_data(vector, *idx);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -227,7 +243,11 @@ impl HnswIndex {
|
|||
})
|
||||
}
|
||||
|
||||
/// Search with custom efSearch parameter
|
||||
/// Search with custom efSearch parameter.
|
||||
///
|
||||
/// `ef_search` must be >= `k`; values smaller than `k` are clamped to `k`
|
||||
/// to avoid silent under-recall. Results are returned sorted by ascending
|
||||
/// distance (closest first).
|
||||
pub fn search_with_ef(
|
||||
&self,
|
||||
query: &[f32],
|
||||
|
|
@ -241,12 +261,27 @@ impl HnswIndex {
|
|||
});
|
||||
}
|
||||
|
||||
if k == 0 {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let inner = self.inner.read();
|
||||
|
||||
// Use HNSW search with custom ef parameter (knbn)
|
||||
let neighbors = inner.hnsw.search(query, k, ef_search);
|
||||
// hnsw_rs panics in its BinaryHeap traversal when the index is empty
|
||||
// or contains only a single element (the candidate/return-point loop
|
||||
// calls .peek().unwrap() without an emptiness guard). Return early
|
||||
// to surface a clean error instead of an assertion panic.
|
||||
if inner.vectors.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
Ok(neighbors
|
||||
// ef_search < k causes hnsw_rs to return fewer than k candidates; clamp.
|
||||
let effective_ef = ef_search.max(k);
|
||||
|
||||
// Use HNSW search with custom ef parameter (knbn)
|
||||
let neighbors = inner.hnsw.search(query, k, effective_ef);
|
||||
|
||||
let mut results: Vec<SearchResult> = neighbors
|
||||
.into_iter()
|
||||
.filter_map(|neighbor| {
|
||||
inner.idx_to_id.get(&neighbor.d_id).map(|id| SearchResult {
|
||||
|
|
@ -256,7 +291,16 @@ impl HnswIndex {
|
|||
metadata: None,
|
||||
})
|
||||
})
|
||||
.collect())
|
||||
.collect();
|
||||
|
||||
// hnsw_rs does not guarantee sort order — ensure ascending distance.
|
||||
results.sort_unstable_by(|a, b| {
|
||||
a.score
|
||||
.partial_cmp(&b.score)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
413
crates/ruvector-core/src/integration.rs
Normal file
413
crates/ruvector-core/src/integration.rs
Normal file
|
|
@ -0,0 +1,413 @@
|
|||
//! Cross-integration helpers for ruvnet crate ecosystem.
|
||||
//!
|
||||
//! This module provides ergonomic adapters that make it straightforward to use
|
||||
//! `ruvector-core` as a dependency from other ruvnet crates:
|
||||
//!
|
||||
//! - **ruv-FANN**: neural-network weights can be stored and retrieved via
|
||||
//! [`FannAdapter`] using cosine similarity search across layer embeddings.
|
||||
//! - **sparc / semantic file search**: [`SemanticSearchAdapter`] wraps
|
||||
//! [`VectorDB`] with file-path metadata so sparc can locate relevant source
|
||||
//! files by embedding query strings.
|
||||
//!
|
||||
//! Both adapters are thin, zero-overhead wrappers — they own no additional
|
||||
//! memory beyond what the underlying [`VectorDB`] already holds.
|
||||
|
||||
use crate::error::{Result, RuvectorError};
|
||||
use crate::types::{DbOptions, DistanceMetric, HnswConfig, SearchQuery, SearchResult, VectorEntry};
|
||||
use crate::vector_db::VectorDB;
|
||||
use std::collections::HashMap;
|
||||
|
||||
// ── ruv-FANN integration ────────────────────────────────────────────────────
|
||||
|
||||
/// Adapter that lets ruv-FANN store and retrieve layer-weight embeddings.
|
||||
///
|
||||
/// Each neural-network layer can be fingerprinted as a flat `f32` embedding
|
||||
/// (e.g. the flattened weight matrix or its PCA projection). Storing these
|
||||
/// fingerprints in RuVector enables fast recall of "similar layers" across
|
||||
/// model checkpoints.
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use ruvector_core::integration::FannAdapter;
|
||||
///
|
||||
/// let mut adapter = FannAdapter::new(128, "./fann_index.db").unwrap();
|
||||
/// adapter.store_layer("model_v1/layer_0", &[0.1f32; 128], None).unwrap();
|
||||
/// let similar = adapter.find_similar_layers(&[0.1f32; 128], 5).unwrap();
|
||||
/// ```
|
||||
pub struct FannAdapter {
|
||||
db: VectorDB,
|
||||
}
|
||||
|
||||
impl FannAdapter {
|
||||
/// Create a new adapter backed by a RuVector database.
|
||||
///
|
||||
/// `dimensions` must match the size of the layer embeddings you intend
|
||||
/// to store. Cosine distance is used because weight embeddings are
|
||||
/// typically meaningful up to scale.
|
||||
pub fn new(dimensions: usize, storage_path: impl Into<String>) -> Result<Self> {
|
||||
let options = DbOptions {
|
||||
dimensions,
|
||||
distance_metric: DistanceMetric::Cosine,
|
||||
storage_path: storage_path.into(),
|
||||
hnsw_config: Some(HnswConfig {
|
||||
m: 16,
|
||||
ef_construction: 100,
|
||||
ef_search: 100,
|
||||
max_elements: 100_000,
|
||||
}),
|
||||
quantization: None,
|
||||
};
|
||||
Ok(Self {
|
||||
db: VectorDB::new(options)?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Store a layer embedding identified by `layer_id`.
|
||||
///
|
||||
/// `metadata` can carry arbitrary JSON-serialisable key-value pairs
|
||||
/// (e.g. model name, checkpoint step, layer type).
|
||||
pub fn store_layer(
|
||||
&self,
|
||||
layer_id: impl Into<String>,
|
||||
embedding: &[f32],
|
||||
metadata: Option<HashMap<String, serde_json::Value>>,
|
||||
) -> Result<String> {
|
||||
let id = layer_id.into();
|
||||
self.db.insert(VectorEntry {
|
||||
id: Some(id),
|
||||
vector: embedding.to_vec(),
|
||||
metadata,
|
||||
})
|
||||
}
|
||||
|
||||
/// Find the `k` most similar layer embeddings to `query`.
|
||||
///
|
||||
/// Returns results sorted by ascending cosine distance.
|
||||
pub fn find_similar_layers(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
|
||||
self.db.search(SearchQuery {
|
||||
vector: query.to_vec(),
|
||||
k,
|
||||
filter: None,
|
||||
ef_search: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Find similar layers with a filter on metadata fields.
|
||||
///
|
||||
/// Only results where every `(key, value)` in `filter` matches are returned.
|
||||
pub fn find_similar_layers_filtered(
|
||||
&self,
|
||||
query: &[f32],
|
||||
k: usize,
|
||||
filter: HashMap<String, serde_json::Value>,
|
||||
) -> Result<Vec<SearchResult>> {
|
||||
self.db.search(SearchQuery {
|
||||
vector: query.to_vec(),
|
||||
k,
|
||||
filter: Some(filter),
|
||||
ef_search: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Delete a layer embedding by ID.
|
||||
pub fn delete_layer(&self, layer_id: &str) -> Result<bool> {
|
||||
self.db.delete(layer_id)
|
||||
}
|
||||
|
||||
/// Total number of stored layer embeddings.
|
||||
pub fn len(&self) -> Result<usize> {
|
||||
self.db.len()
|
||||
}
|
||||
|
||||
/// Returns `true` if no embeddings have been stored yet.
|
||||
pub fn is_empty(&self) -> Result<bool> {
|
||||
self.db.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
// ── sparc / semantic file search integration ────────────────────────────────
|
||||
|
||||
/// A file-path entry as indexed by [`SemanticSearchAdapter`].
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FileEntry {
|
||||
/// Absolute or relative path to the source file.
|
||||
pub path: String,
|
||||
/// Brief human-readable description of the file's contents.
|
||||
pub description: String,
|
||||
/// The embedding dimension used to index this file.
|
||||
pub dimensions: usize,
|
||||
}
|
||||
|
||||
/// Adapter for sparc-style semantic file search.
|
||||
///
|
||||
/// sparc needs to locate relevant source files given a natural-language query
|
||||
/// string. This adapter stores one embedding per file (derived externally,
|
||||
/// e.g. from an ONNX all-MiniLM model) and retrieves the closest matches
|
||||
/// using HNSW approximate nearest-neighbour search.
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use ruvector_core::integration::SemanticSearchAdapter;
|
||||
///
|
||||
/// let mut adapter = SemanticSearchAdapter::new(384, "./sparc_index.db").unwrap();
|
||||
///
|
||||
/// // Index source files (embeddings produced by your embedding pipeline)
|
||||
/// adapter.index_file("src/auth/service.rs", "authentication service", &[0.0f32; 384]).unwrap();
|
||||
/// adapter.index_file("src/user/model.rs", "user data model", &[0.1f32; 384]).unwrap();
|
||||
///
|
||||
/// // Query with a natural-language description
|
||||
/// let results = adapter.search("jwt token validation", &[0.05f32; 384], 5).unwrap();
|
||||
/// for r in results {
|
||||
/// println!(" {} (score={:.4})", r.id, r.score);
|
||||
/// }
|
||||
/// ```
|
||||
pub struct SemanticSearchAdapter {
|
||||
db: VectorDB,
|
||||
dimensions: usize,
|
||||
}
|
||||
|
||||
impl SemanticSearchAdapter {
|
||||
/// Create a new adapter.
|
||||
///
|
||||
/// `dimensions` is the embedding dimension of your model (e.g. 384 for
|
||||
/// all-MiniLM-L6-v2, 768 for BERT-base).
|
||||
pub fn new(dimensions: usize, storage_path: impl Into<String>) -> Result<Self> {
|
||||
let options = DbOptions {
|
||||
dimensions,
|
||||
distance_metric: DistanceMetric::Cosine,
|
||||
storage_path: storage_path.into(),
|
||||
hnsw_config: Some(HnswConfig {
|
||||
m: 16,
|
||||
ef_construction: 100,
|
||||
ef_search: 100,
|
||||
max_elements: 500_000,
|
||||
}),
|
||||
quantization: None,
|
||||
};
|
||||
Ok(Self {
|
||||
db: VectorDB::new(options)?,
|
||||
dimensions,
|
||||
})
|
||||
}
|
||||
|
||||
/// Index a source file.
|
||||
///
|
||||
/// The file `path` is used as the vector ID so look-ups are O(1).
|
||||
/// `description` is stored in metadata for debugging / display.
|
||||
/// `embedding` must have the same length as the adapter's `dimensions`.
|
||||
pub fn index_file(
|
||||
&self,
|
||||
path: impl Into<String>,
|
||||
description: impl Into<String>,
|
||||
embedding: &[f32],
|
||||
) -> Result<String> {
|
||||
let path_str = path.into();
|
||||
if embedding.len() != self.dimensions {
|
||||
return Err(RuvectorError::DimensionMismatch {
|
||||
expected: self.dimensions,
|
||||
actual: embedding.len(),
|
||||
});
|
||||
}
|
||||
|
||||
let mut metadata = HashMap::new();
|
||||
metadata.insert(
|
||||
"description".to_string(),
|
||||
serde_json::Value::String(description.into()),
|
||||
);
|
||||
metadata.insert(
|
||||
"path".to_string(),
|
||||
serde_json::Value::String(path_str.clone()),
|
||||
);
|
||||
|
||||
self.db.insert(VectorEntry {
|
||||
id: Some(path_str),
|
||||
vector: embedding.to_vec(),
|
||||
metadata: Some(metadata),
|
||||
})
|
||||
}
|
||||
|
||||
/// Remove a previously indexed file.
|
||||
pub fn remove_file(&self, path: &str) -> Result<bool> {
|
||||
self.db.delete(path)
|
||||
}
|
||||
|
||||
/// Search for source files semantically related to `query_embedding`.
|
||||
///
|
||||
/// Returns up to `k` results sorted by ascending cosine distance
|
||||
/// (most relevant first). Each [`SearchResult`] has `.id` set to the
|
||||
/// file path and `.metadata` containing the description.
|
||||
pub fn search(
|
||||
&self,
|
||||
_query_text: &str,
|
||||
query_embedding: &[f32],
|
||||
k: usize,
|
||||
) -> Result<Vec<SearchResult>> {
|
||||
if query_embedding.len() != self.dimensions {
|
||||
return Err(RuvectorError::DimensionMismatch {
|
||||
expected: self.dimensions,
|
||||
actual: query_embedding.len(),
|
||||
});
|
||||
}
|
||||
self.db.search(SearchQuery {
|
||||
vector: query_embedding.to_vec(),
|
||||
k,
|
||||
filter: None,
|
||||
ef_search: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Total number of indexed files.
|
||||
pub fn len(&self) -> Result<usize> {
|
||||
self.db.len()
|
||||
}
|
||||
|
||||
/// Returns `true` if no files have been indexed yet.
|
||||
pub fn is_empty(&self) -> Result<bool> {
|
||||
self.db.is_empty()
|
||||
}
|
||||
|
||||
/// List all indexed file paths.
|
||||
pub fn list_files(&self) -> Result<Vec<String>> {
|
||||
self.db.keys()
|
||||
}
|
||||
}
|
||||
|
||||
// ── Shared utility ──────────────────────────────────────────────────────────
|
||||
|
||||
/// Normalise a vector to unit length for cosine-distance workloads.
|
||||
///
|
||||
/// Returns the original vector unchanged if its norm is effectively zero
|
||||
/// (to avoid division by zero on zero vectors).
|
||||
#[inline]
|
||||
pub fn normalize(v: &[f32]) -> Vec<f32> {
|
||||
let norm_sq: f32 = v.iter().map(|x| x * x).sum();
|
||||
if norm_sq < f32::EPSILON {
|
||||
return v.to_vec();
|
||||
}
|
||||
let norm = norm_sq.sqrt();
|
||||
v.iter().map(|x| x / norm).collect()
|
||||
}
|
||||
|
||||
/// Compute the cosine similarity in [−1, 1] between two vectors.
|
||||
///
|
||||
/// Both inputs are treated as raw (un-normalised) vectors.
|
||||
/// Returns `0.0` if either vector is zero-length.
|
||||
#[inline]
|
||||
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
debug_assert_eq!(a.len(), b.len(), "cosine_similarity: length mismatch");
|
||||
let (mut dot, mut norm_a, mut norm_b) = (0.0f32, 0.0f32, 0.0f32);
|
||||
for (&ai, &bi) in a.iter().zip(b.iter()) {
|
||||
dot += ai * bi;
|
||||
norm_a += ai * ai;
|
||||
norm_b += bi * bi;
|
||||
}
|
||||
let denom = norm_a.sqrt() * norm_b.sqrt();
|
||||
if denom > f32::EPSILON {
|
||||
dot / denom
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[test]
|
||||
fn test_normalize_unit_vector() {
|
||||
let v = vec![3.0f32, 4.0];
|
||||
let n = normalize(&v);
|
||||
let norm: f32 = n.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
assert!(
|
||||
(norm - 1.0).abs() < 1e-6,
|
||||
"Expected unit norm, got {}",
|
||||
norm
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_zero_vector() {
|
||||
let v = vec![0.0f32, 0.0, 0.0];
|
||||
let n = normalize(&v);
|
||||
assert_eq!(n, v, "Zero vector should be returned unchanged");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity_identical() {
|
||||
let v = vec![1.0f32, 2.0, 3.0];
|
||||
let sim = cosine_similarity(&v, &v);
|
||||
assert!(
|
||||
(sim - 1.0).abs() < 1e-5,
|
||||
"Identical vectors: expected 1.0, got {}",
|
||||
sim
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity_orthogonal() {
|
||||
let a = vec![1.0f32, 0.0];
|
||||
let b = vec![0.0f32, 1.0];
|
||||
let sim = cosine_similarity(&a, &b);
|
||||
assert!(
|
||||
sim.abs() < 1e-5,
|
||||
"Orthogonal vectors: expected 0.0, got {}",
|
||||
sim
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_semantic_search_adapter_roundtrip() {
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("sparc.db").to_string_lossy().to_string();
|
||||
let adapter = SemanticSearchAdapter::new(4, path).unwrap();
|
||||
|
||||
let emb_a = normalize(&[1.0, 0.0, 0.0, 0.0]);
|
||||
let emb_b = normalize(&[0.0, 1.0, 0.0, 0.0]);
|
||||
let emb_c = normalize(&[0.0, 0.0, 1.0, 0.0]);
|
||||
|
||||
// hnsw_rs requires at least 2 elements before searching.
|
||||
adapter
|
||||
.index_file("src/auth.rs", "authentication", &emb_a)
|
||||
.unwrap();
|
||||
adapter
|
||||
.index_file("src/user.rs", "user model", &emb_b)
|
||||
.unwrap();
|
||||
adapter
|
||||
.index_file("src/storage.rs", "storage layer", &emb_c)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(adapter.len().unwrap(), 3);
|
||||
|
||||
// Query close to emb_a — should return src/auth.rs first
|
||||
let results = adapter.search("auth", &emb_a, 2).unwrap();
|
||||
assert!(!results.is_empty());
|
||||
assert_eq!(results[0].id, "src/auth.rs");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fann_adapter_store_and_retrieve() {
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("fann.db").to_string_lossy().to_string();
|
||||
let adapter = FannAdapter::new(4, path).unwrap();
|
||||
|
||||
let layer_emb_0 = normalize(&[1.0, 1.0, 0.0, 0.0]);
|
||||
let layer_emb_1 = normalize(&[0.0, 0.0, 1.0, 1.0]);
|
||||
let layer_emb_2 = normalize(&[1.0, 0.0, 1.0, 0.0]);
|
||||
|
||||
// hnsw_rs requires at least 2 elements before searching.
|
||||
adapter
|
||||
.store_layer("model_v1/layer_0", &layer_emb_0, None)
|
||||
.unwrap();
|
||||
adapter
|
||||
.store_layer("model_v1/layer_1", &layer_emb_1, None)
|
||||
.unwrap();
|
||||
adapter
|
||||
.store_layer("model_v1/layer_2", &layer_emb_2, None)
|
||||
.unwrap();
|
||||
|
||||
let results = adapter.find_similar_layers(&layer_emb_0, 1).unwrap();
|
||||
assert!(!results.is_empty());
|
||||
assert_eq!(results[0].id, "model_v1/layer_0");
|
||||
}
|
||||
}
|
||||
|
|
@ -73,6 +73,12 @@ pub mod memory;
|
|||
/// Advanced techniques: hypergraphs, learned indexes, neural hashing, TDA (Phase 6)
|
||||
pub mod advanced;
|
||||
|
||||
/// Cross-integration helpers for the ruvnet crate ecosystem.
|
||||
///
|
||||
/// Provides [`integration::FannAdapter`] for ruv-FANN layer-embedding storage
|
||||
/// and [`integration::SemanticSearchAdapter`] for sparc semantic file search.
|
||||
pub mod integration;
|
||||
|
||||
// Re-exports
|
||||
pub use advanced_features::{
|
||||
fuse_rankings, ConformalConfig, ConformalPredictor, EnhancedPQ, FilterExpression,
|
||||
|
|
|
|||
|
|
@ -87,10 +87,13 @@ pub struct HnswConfig {
|
|||
impl Default for HnswConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
m: 32,
|
||||
ef_construction: 200,
|
||||
m: 16,
|
||||
ef_construction: 100,
|
||||
ef_search: 100,
|
||||
max_elements: 10_000_000,
|
||||
// 1M is a reasonable default that avoids excessive upfront memory
|
||||
// allocation while still being suitable for production workloads.
|
||||
// Callers building large indexes should set this explicitly.
|
||||
max_elements: 1_000_000,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -493,3 +493,116 @@ fn test_hnsw_parallel_batch_insert() -> Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ── New tests covering correctness fixes ────────────────────────────────────
|
||||
|
||||
/// Verify that `search` with k=0 returns an empty vec without panicking.
|
||||
#[test]
|
||||
fn test_hnsw_search_k_zero() -> Result<()> {
|
||||
let config = HnswConfig {
|
||||
m: 16,
|
||||
ef_construction: 100,
|
||||
ef_search: 50,
|
||||
max_elements: 1000,
|
||||
};
|
||||
|
||||
let mut index = HnswIndex::new(32, DistanceMetric::Euclidean, config)?;
|
||||
index.add("v0".to_string(), vec![0.0f32; 32])?;
|
||||
|
||||
let results = index.search(&vec![0.0f32; 32], 0)?;
|
||||
assert!(results.is_empty(), "k=0 must return empty results");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Verify that search results are sorted ascending by distance.
|
||||
#[test]
|
||||
fn test_hnsw_results_sorted_ascending() -> Result<()> {
|
||||
let dimensions = 64;
|
||||
let num_vectors = 200;
|
||||
let k = 20;
|
||||
|
||||
let config = HnswConfig {
|
||||
m: 16,
|
||||
ef_construction: 100,
|
||||
ef_search: 100,
|
||||
max_elements: 1000,
|
||||
};
|
||||
|
||||
let mut index = HnswIndex::new(dimensions, DistanceMetric::Euclidean, config)?;
|
||||
|
||||
let vectors = generate_random_vectors(num_vectors, dimensions, 31415);
|
||||
for (i, v) in vectors.iter().enumerate() {
|
||||
index.add(format!("v{}", i), v.clone())?;
|
||||
}
|
||||
|
||||
let query = &vectors[0];
|
||||
let results = index.search(query, k)?;
|
||||
|
||||
assert!(!results.is_empty());
|
||||
for window in results.windows(2) {
|
||||
assert!(
|
||||
window[0].score <= window[1].score,
|
||||
"Results not sorted: score[n]={} > score[n+1]={}",
|
||||
window[0].score,
|
||||
window[1].score
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Verify that `set_ef_search` actually changes the effective ef used for search.
|
||||
#[test]
|
||||
fn test_hnsw_set_ef_search_updates_config() -> Result<()> {
|
||||
let dimensions = 32;
|
||||
let config = HnswConfig {
|
||||
m: 16,
|
||||
ef_construction: 100,
|
||||
ef_search: 50,
|
||||
max_elements: 1000,
|
||||
};
|
||||
|
||||
let mut index = HnswIndex::new(dimensions, DistanceMetric::Cosine, config)?;
|
||||
assert_eq!(index.config().ef_search, 50);
|
||||
|
||||
index.set_ef_search(200);
|
||||
assert_eq!(
|
||||
index.config().ef_search,
|
||||
200,
|
||||
"set_ef_search should update config.ef_search"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Verify that `ef_search < k` is clamped to k rather than silently under-recalling.
|
||||
#[test]
|
||||
fn test_hnsw_search_with_ef_clamps_to_k() -> Result<()> {
|
||||
let dimensions = 32;
|
||||
let num_vectors = 100;
|
||||
let k = 20;
|
||||
|
||||
let config = HnswConfig {
|
||||
m: 16,
|
||||
ef_construction: 100,
|
||||
ef_search: 5, // intentionally lower than k
|
||||
max_elements: 1000,
|
||||
};
|
||||
|
||||
let mut index = HnswIndex::new(dimensions, DistanceMetric::Euclidean, config)?;
|
||||
let vectors = generate_random_vectors(num_vectors, dimensions, 27182);
|
||||
for (i, v) in vectors.iter().enumerate() {
|
||||
index.add(format!("v{}", i), v.clone())?;
|
||||
}
|
||||
|
||||
// search() uses ef_search=5 internally, which is < k=20; results should
|
||||
// still be at least as many as the index can return (not zero).
|
||||
let results = index.search(&vectors[0], k)?;
|
||||
assert!(
|
||||
!results.is_empty(),
|
||||
"search with ef_search < k must still return results"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue