mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-25 23:24:03 +00:00
docs: Add honest documentation about capabilities and limitations
- Update lib.rs with tested/benchmarked features vs experimental ones - Mark AgenticDB embedding function as placeholder (NOT semantic) - Add warning to RAG example about mock embeddings - Clarify that external embedding models are required for semantic search
This commit is contained in:
parent
c46dc4aae2
commit
ef54ee9415
3 changed files with 43 additions and 10 deletions
|
|
@ -656,10 +656,22 @@ impl AgenticDB {
|
|||
|
||||
// ============ Helper Methods ============
|
||||
|
||||
/// Generate text embedding (placeholder - would use actual embedding model)
|
||||
/// Generate text embedding from text.
|
||||
///
|
||||
/// # ⚠️ WARNING: PLACEHOLDER IMPLEMENTATION
|
||||
///
|
||||
/// This uses a simple hash-based embedding that does NOT understand
|
||||
/// semantic meaning. Text similarity will be based on character overlap,
|
||||
/// not actual meaning.
|
||||
///
|
||||
/// For real semantic search, integrate an actual embedding model:
|
||||
/// - `sentence-transformers` via Python bindings
|
||||
/// - `candle` for native Rust inference
|
||||
/// - ONNX Runtime for cross-platform models
|
||||
/// - OpenAI/Anthropic embedding APIs
|
||||
fn generate_text_embedding(&self, text: &str) -> Result<Vec<f32>> {
|
||||
// Simple hash-based embedding for demonstration
|
||||
// In production, use actual embedding models like sentence-transformers
|
||||
// ⚠️ PLACEHOLDER: Hash-based embedding - NOT semantic
|
||||
// This is for demonstration and testing only
|
||||
let mut embedding = vec![0.0; self.dimensions];
|
||||
let bytes = text.as_bytes();
|
||||
|
||||
|
|
|
|||
|
|
@ -2,13 +2,24 @@
|
|||
//!
|
||||
//! High-performance Rust-native vector database with HNSW indexing and SIMD-optimized operations.
|
||||
//!
|
||||
//! ## Features
|
||||
//! ## Working Features (Tested & Benchmarked)
|
||||
//!
|
||||
//! - **HNSW Indexing**: O(log n) search with 95%+ recall
|
||||
//! - **SIMD Optimizations**: 4-16x faster distance calculations
|
||||
//! - **Quantization**: 4-32x memory compression
|
||||
//! - **Zero-copy Memory**: Memory-mapped vectors for instant loading
|
||||
//! - **AgenticDB Compatible**: Drop-in replacement with 10-100x speedup
|
||||
//! - **HNSW Indexing**: Approximate nearest neighbor search with O(log n) complexity
|
||||
//! - **SIMD Distance**: SimSIMD-powered distance calculations (~16M ops/sec for 512-dim)
|
||||
//! - **Quantization**: Scalar (4x) and binary (32x) compression with distance support
|
||||
//! - **Persistence**: REDB-based storage with config persistence
|
||||
//! - **Search**: ~2.5K queries/sec on 10K vectors (benchmarked)
|
||||
//!
|
||||
//! ## Experimental/Incomplete Features
|
||||
//!
|
||||
//! - **AgenticDB**: Uses placeholder hash-based embeddings (NOT semantic)
|
||||
//! - Replace `generate_text_embedding` with real model for production use
|
||||
//! - **Advanced Features**: Conformal prediction, hybrid search - functional but less tested
|
||||
//!
|
||||
//! ## What This Is NOT
|
||||
//!
|
||||
//! - This is NOT a complete RAG solution - you need external embedding models
|
||||
//! - Examples use mock embeddings for demonstration only
|
||||
|
||||
#![warn(missing_docs)]
|
||||
#![warn(clippy::all)]
|
||||
|
|
|
|||
|
|
@ -1,6 +1,13 @@
|
|||
//! RAG (Retrieval Augmented Generation) Pipeline Example
|
||||
//!
|
||||
//! Demonstrates building a complete RAG system with Ruvector
|
||||
//! Demonstrates building a complete RAG system with Ruvector.
|
||||
//!
|
||||
//! ⚠️ NOTE: This example uses MOCK embeddings for demonstration.
|
||||
//! In production, replace `mock_embedding()` with a real embedding model:
|
||||
//! - `sentence-transformers` via Python bindings
|
||||
//! - `candle` for native Rust inference
|
||||
//! - ONNX Runtime for cross-platform models
|
||||
//! - OpenAI/Anthropic embedding APIs
|
||||
|
||||
use ruvector_core::{VectorDB, VectorEntry, SearchQuery, DbOptions, Result};
|
||||
use std::collections::HashMap;
|
||||
|
|
@ -114,6 +121,9 @@ fn main() -> Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// ⚠️ MOCK EMBEDDING - NOT SEMANTIC
|
||||
/// This produces deterministic vectors based on seed value.
|
||||
/// Replace with actual embedding model for real semantic search.
|
||||
fn mock_embedding(dims: usize, seed: f32) -> Vec<f32> {
|
||||
(0..dims)
|
||||
.map(|i| (seed + i as f32 * 0.001).sin())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue