ruvector/examples/graph/hybrid_search.rs
Claude f3f7a95752 feat: Add Neo4j-compatible hypergraph database package (ruvector-graph)
Major new package implementing a distributed hypergraph database with:

## Core Components (crates/ruvector-graph/)
- Cypher-compatible query parser with lexer, AST, optimizer
- Query execution engine with SIMD optimization and parallel execution
- ACID transaction support with MVCC isolation levels
- Distributed consensus and federation layer
- Vector-graph hybrid queries for AI/RAG workloads
- Performance optimizations (100x faster than Neo4j target)

## Bindings
- WASM bindings (crates/ruvector-graph-wasm/)
- NAPI-RS Node.js bindings (crates/ruvector-graph-node/)
- NPM packages for both targets

## CLI Integration
- 8 new graph commands: create, query, shell, import, export, info, benchmark, serve

## CI/CD
- Updated build-native.yml for graph packages
- New graph-ci.yml for testing and benchmarks
- New graph-release.yml for automated publishing

## Data Generation
- OpenRouter/Kimi K2 integration (packages/graph-data-generator/)
- Agentic-synth benchmark suite integration

## Tests & Benchmarks
- 11 test files covering all components
- Criterion benchmarks for performance validation
- Neo4j compatibility test suite

## Architecture Highlights
- CSR graph layout for cache-friendly access
- SIMD-vectorized query operators
- Roaring bitmaps for label indexes
- Bloom filters for fast negative lookups
- Adaptive radix tree for property indexes

Note: This is a comprehensive implementation created by 15 parallel agents.
Some integration fixes may be needed to resolve cross-module dependencies.

Co-authored-by: Claude AI Swarm <swarm@claude.ai>
2025-11-25 23:11:54 +00:00

161 lines
5.8 KiB
Rust

//! Hybrid Vector-Graph Search Example
//!
//! This example demonstrates combining vector similarity search
//! with graph traversal for powerful hybrid queries:
//! - Vector embeddings on graph nodes
//! - Semantic similarity search
//! - Graph-constrained vector search
//! - Ranking by combined scores
fn main() {
println!("=== RuVector Graph - Hybrid Search ===\n");
// TODO: Once the graph and vector APIs are integrated, implement:
println!("1. Initialize Hybrid Database");
// let db = GraphDatabase::open("./data/hybrid_db.db")?;
// let vector_store = db.enable_vector_index("embeddings", 768)?;
println!(" ✓ Created graph database with vector indexing");
println!("\n2. Create Nodes with Vector Embeddings");
// Create documents with text embeddings
// let doc1 = db.create_node()
// .label("Document")
// .property("title", "Introduction to Rust")
// .property("content", "Rust is a systems programming language...")
// .embedding(embedding_model.encode("Introduction to Rust..."))
// .execute()?;
// let doc2 = db.create_node()
// .label("Document")
// .property("title", "Advanced Rust Patterns")
// .property("content", "This article covers advanced patterns...")
// .embedding(embedding_model.encode("Advanced Rust Patterns..."))
// .execute()?;
println!(" ✓ Created documents with vector embeddings");
println!("\n3. Create Relationships");
// Create citation relationships
// db.create_relationship()
// .from(doc2)
// .to(doc1)
// .type("CITES")
// .execute()?;
println!(" ✓ Created citation relationships");
println!("\n4. Semantic Search (Vector Only)");
// let query = "memory safety in programming";
// let query_embedding = embedding_model.encode(query);
// let results = db.vector_search()
// .index("embeddings")
// .query(query_embedding)
// .top_k(5)
// .execute()?;
// println!(" Query: '{}'", query);
// for (doc, score) in results {
// println!(" - {} (score: {:.4})", doc.property("title"), score);
// }
println!("\n5. Graph-Constrained Vector Search");
// Find similar documents that are cited by "Advanced Rust Patterns"
// let results = db.hybrid_query()
// .cypher(r#"
// MATCH (source:Document {title: 'Advanced Rust Patterns'})-[:CITES]->(cited:Document)
// RETURN cited
// "#)
// .vector_similarity(query_embedding, 0.7) // minimum similarity threshold
// .execute()?;
// println!(" Cited documents similar to query:");
// for doc in results {
// println!(" - {}", doc.property("title"));
// }
println!("\n6. Ranked Hybrid Search");
// Combine vector similarity and graph metrics
// let results = db.hybrid_query()
// .vector_search(query_embedding)
// .graph_metric("pagerank")
// .combine_scores(|vector_score, graph_score| {
// 0.7 * vector_score + 0.3 * graph_score
// })
// .top_k(10)
// .execute()?;
// println!(" Top results (vector + graph ranking):");
// for (doc, combined_score, details) in results {
// println!(" - {} (score: {:.4}, vector: {:.4}, graph: {:.4})",
// doc.property("title"),
// combined_score,
// details.vector_score,
// details.graph_score
// );
// }
println!("\n7. Multi-Hop Vector Search");
// Find documents connected through multiple hops, ranked by similarity
// let results = db.hybrid_query()
// .cypher(r#"
// MATCH path = (start:Document {title: 'Introduction to Rust'})
// -[:CITES|:RELATED_TO*1..3]->
// (end:Document)
// RETURN end, length(path) as distance
// "#)
// .vector_similarity(query_embedding, 0.6)
// .penalty_per_hop(0.1) // Reduce score for each hop
// .execute()?;
// println!(" Multi-hop search results:");
// for (doc, score, distance) in results {
// println!(" - {} (score: {:.4}, hops: {})",
// doc.property("title"), score, distance
// );
// }
println!("\n8. Filtered Vector Search with Graph Constraints");
// Complex hybrid query with multiple constraints
// let results = db.hybrid_query()
// .cypher(r#"
// MATCH (doc:Document)-[:AUTHORED_BY]->(author:Person)
// WHERE author.institution = 'MIT'
// AND doc.year >= 2020
// RETURN doc
// "#)
// .vector_search(query_embedding)
// .top_k(5)
// .execute()?;
// println!(" MIT papers since 2020 similar to query:");
// for (doc, score) in results {
// println!(" - {} (score: {:.4})", doc.property("title"), score);
// }
println!("\n9. Performance Comparison");
// Compare pure vector vs hybrid search
// let start = std::time::Instant::now();
// let vector_results = db.vector_search()
// .query(query_embedding)
// .top_k(100)
// .execute()?;
// let vector_time = start.elapsed();
// let start = std::time::Instant::now();
// let hybrid_results = db.hybrid_query()
// .vector_search(query_embedding)
// .graph_filter(/* some constraint */)
// .top_k(100)
// .execute()?;
// let hybrid_time = start.elapsed();
// println!(" Vector-only: {:?}", vector_time);
// println!(" Hybrid: {:?}", hybrid_time);
// println!(" Overhead: {:.2}%", (hybrid_time.as_secs_f64() / vector_time.as_secs_f64() - 1.0) * 100.0);
println!("\n=== Example Complete ===");
println!("\nNote: This is a template. Actual implementation pending graph API exposure.");
}