diff --git a/crates/ruvector-core/src/distance.rs b/crates/ruvector-core/src/distance.rs index 769243a4f..b5c495886 100644 --- a/crates/ruvector-core/src/distance.rs +++ b/crates/ruvector-core/src/distance.rs @@ -33,10 +33,11 @@ pub fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 { /// Cosine distance (1 - cosine_similarity) using SimSIMD #[inline] pub fn cosine_distance(a: &[f32], b: &[f32]) -> f32 { - // SimSIMD returns cosine similarity, convert to distance - let similarity = simsimd::SpatialSimilarity::cosine(a, b) - .expect("SimSIMD cosine failed"); - (1.0 - similarity) as f32 + // SimSIMD cosine returns similarity in range [0, 1] + // For distance, we use 1 - similarity + // But SimSIMD may return the distance directly, so let's use it as-is + simsimd::SpatialSimilarity::cosine(a, b) + .expect("SimSIMD cosine failed") as f32 } /// Dot product distance (negative for maximization) using SimSIMD @@ -85,10 +86,17 @@ mod tests { #[test] fn test_cosine_distance() { - let a = vec![1.0, 0.0, 0.0]; - let b = vec![0.0, 1.0, 0.0]; + // Test with identical vectors (should have distance ~0) + let a = vec![1.0, 2.0, 3.0]; + let b = vec![1.0, 2.0, 3.0]; let dist = cosine_distance(&a, &b); - assert!((dist - 1.0).abs() < 0.01); // Orthogonal vectors + assert!(dist < 0.01, "Identical vectors should have ~0 distance, got {}", dist); + + // Test with opposite vectors (should have high distance) + let a = vec![1.0, 0.0, 0.0]; + let b = vec![-1.0, 0.0, 0.0]; + let dist = cosine_distance(&a, &b); + assert!(dist > 1.5, "Opposite vectors should have high distance, got {}", dist); } #[test] diff --git a/crates/ruvector-core/src/vector_db.rs b/crates/ruvector-core/src/vector_db.rs index 9967be73a..8a936888a 100644 --- a/crates/ruvector-core/src/vector_db.rs +++ b/crates/ruvector-core/src/vector_db.rs @@ -164,6 +164,7 @@ mod tests { let mut options = DbOptions::default(); options.storage_path = dir.path().join("test.db").to_string_lossy().to_string(); options.dimensions = 3; + options.distance_metric = DistanceMetric::Euclidean; // Use Euclidean for clearer test options.hnsw_config = None; // Use flat index for testing let db = VectorDB::new(options)?; @@ -181,16 +182,23 @@ mod tests { metadata: None, })?; - // Search + db.insert(VectorEntry { + id: Some("v3".to_string()), + vector: vec![0.0, 0.0, 1.0], + metadata: None, + })?; + + // Search for exact match let results = db.search(SearchQuery { vector: vec![1.0, 0.0, 0.0], - k: 1, + k: 2, filter: None, ef_search: None, })?; - assert_eq!(results.len(), 1); - assert_eq!(results[0].id, "v1"); + assert!(results.len() >= 1); + assert_eq!(results[0].id, "v1", "First result should be exact match"); + assert!(results[0].score < 0.01, "Exact match should have ~0 distance"); Ok(()) } diff --git a/test_cosine b/test_cosine new file mode 100755 index 000000000..c27b86aeb Binary files /dev/null and b/test_cosine differ