diff --git a/crates/ruvector-postgres/docs/integration-plans/01-self-learning.md b/crates/ruvector-postgres/docs/integration-plans/01-self-learning.md new file mode 100644 index 00000000..3823fafd --- /dev/null +++ b/crates/ruvector-postgres/docs/integration-plans/01-self-learning.md @@ -0,0 +1,394 @@ +# Self-Learning / ReasoningBank Integration Plan + +## Overview + +Integrate adaptive learning capabilities into ruvector-postgres, enabling the database to learn from query patterns, optimize search strategies, and improve recall/precision over time. + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ PostgreSQL Extension │ +├─────────────────────────────────────────────────────────────┤ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │ +│ │ Trajectory │ │ Verdict │ │ Memory Distillation│ │ +│ │ Tracker │ │ Judgment │ │ Engine │ │ +│ └──────┬──────┘ └──────┬──────┘ └──────────┬──────────┘ │ +│ │ │ │ │ +│ └────────────────┼─────────────────────┘ │ +│ ▼ │ +│ ┌───────────────────────┐ │ +│ │ ReasoningBank │ │ +│ │ (Pattern Storage) │ │ +│ └───────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Module Structure + +``` +src/ +├── learning/ +│ ├── mod.rs # Module exports +│ ├── trajectory.rs # Query trajectory tracking +│ ├── verdict.rs # Success/failure judgment +│ ├── distillation.rs # Pattern extraction +│ ├── reasoning_bank.rs # Pattern storage & retrieval +│ └── optimizer.rs # Search parameter optimization +``` + +## SQL Interface + +### Configuration + +```sql +-- Enable self-learning for a table +SELECT ruvector_enable_learning('embeddings', + trajectory_window := 1000, + learning_rate := 0.01, + min_samples := 100 +); + +-- View learning statistics +SELECT * FROM ruvector_learning_stats('embeddings'); + +-- Export learned patterns +SELECT ruvector_export_patterns('embeddings') AS patterns_json; + +-- Import patterns from another instance +SELECT ruvector_import_patterns('embeddings', patterns_json); +``` + +### Automatic Optimization + +```sql +-- Auto-tune HNSW parameters based on query patterns +SELECT ruvector_auto_tune('embeddings_idx', + optimize_for := 'recall', -- or 'latency', 'balanced' + sample_queries := 1000 +); + +-- Get recommended index parameters +SELECT * FROM ruvector_recommend_params('embeddings'); +``` + +## Implementation Phases + +### Phase 1: Trajectory Tracking (Week 1-2) + +```rust +// src/learning/trajectory.rs + +pub struct QueryTrajectory { + pub query_id: Uuid, + pub query_vector: Vec, + pub timestamp: DateTime, + pub index_params: IndexParams, + pub results: Vec, + pub latency_ms: f64, + pub recall_estimate: Option, +} + +pub struct TrajectoryTracker { + buffer: RingBuffer, + storage: TrajectoryStorage, +} + +impl TrajectoryTracker { + pub fn record(&mut self, trajectory: QueryTrajectory); + pub fn get_recent(&self, n: usize) -> Vec<&QueryTrajectory>; + pub fn analyze_patterns(&self) -> PatternAnalysis; +} +``` + +**SQL Functions:** +```sql +-- Record query feedback (user indicates relevance) +SELECT ruvector_record_feedback( + query_id := 'abc123', + relevant_ids := ARRAY[1, 5, 7], + irrelevant_ids := ARRAY[2, 3] +); +``` + +### Phase 2: Verdict Judgment (Week 3-4) + +```rust +// src/learning/verdict.rs + +pub struct VerdictEngine { + success_threshold: f32, + metrics: VerdictMetrics, +} + +impl VerdictEngine { + /// Judge if a search was successful based on multiple signals + pub fn judge(&self, trajectory: &QueryTrajectory) -> Verdict { + let signals = vec![ + self.latency_score(trajectory), + self.recall_score(trajectory), + self.diversity_score(trajectory), + self.user_feedback_score(trajectory), + ]; + + Verdict { + success: signals.iter().sum::() / signals.len() as f32 > self.success_threshold, + confidence: self.compute_confidence(&signals), + recommendations: self.generate_recommendations(&signals), + } + } +} +``` + +### Phase 3: Memory Distillation (Week 5-6) + +```rust +// src/learning/distillation.rs + +pub struct DistillationEngine { + pattern_extractor: PatternExtractor, + compressor: PatternCompressor, +} + +impl DistillationEngine { + /// Extract reusable patterns from trajectories + pub fn distill(&self, trajectories: &[QueryTrajectory]) -> Vec { + let raw_patterns = self.pattern_extractor.extract(trajectories); + let compressed = self.compressor.compress(raw_patterns); + compressed + } +} + +pub struct LearnedPattern { + pub query_cluster_centroid: Vec, + pub optimal_ef_search: u32, + pub optimal_probes: u32, + pub expected_recall: f32, + pub confidence: f32, +} +``` + +### Phase 4: ReasoningBank Storage (Week 7-8) + +```rust +// src/learning/reasoning_bank.rs + +pub struct ReasoningBank { + patterns: HnswIndex, + metadata: HashMap, +} + +impl ReasoningBank { + /// Find applicable patterns for a query + pub fn lookup(&self, query: &[f32], k: usize) -> Vec<&LearnedPattern> { + self.patterns.search(query, k) + } + + /// Store a new pattern + pub fn store(&mut self, pattern: LearnedPattern) -> PatternId; + + /// Merge similar patterns to prevent bloat + pub fn consolidate(&mut self); + + /// Prune low-value patterns + pub fn prune(&mut self, min_usage: u32, min_confidence: f32); +} +``` + +### Phase 5: Search Optimizer (Week 9-10) + +```rust +// src/learning/optimizer.rs + +pub struct SearchOptimizer { + reasoning_bank: Arc, + default_params: SearchParams, +} + +impl SearchOptimizer { + /// Get optimized parameters for a query + pub fn optimize(&self, query: &[f32]) -> SearchParams { + match self.reasoning_bank.lookup(query, 3) { + patterns if !patterns.is_empty() => { + self.interpolate_params(query, patterns) + } + _ => self.default_params.clone() + } + } + + fn interpolate_params(&self, query: &[f32], patterns: &[&LearnedPattern]) -> SearchParams { + // Weight patterns by similarity to query + let weights: Vec = patterns.iter() + .map(|p| cosine_similarity(query, &p.query_cluster_centroid)) + .collect(); + + SearchParams { + ef_search: weighted_average( + patterns.iter().map(|p| p.optimal_ef_search as f32), + &weights + ) as u32, + // ... + } + } +} +``` + +## PostgreSQL Integration + +### Background Worker + +```rust +// src/learning/bgworker.rs + +#[pg_guard] +pub extern "C" fn learning_bgworker_main(_arg: pg_sys::Datum) { + BackgroundWorker::attach_signal_handlers(SignalWakeFlags::SIGHUP | SignalWakeFlags::SIGTERM); + + loop { + // Process trajectory buffer + let trajectories = TRAJECTORY_BUFFER.drain(); + + if trajectories.len() >= MIN_BATCH_SIZE { + // Distill patterns + let patterns = DISTILLATION_ENGINE.distill(&trajectories); + + // Store in reasoning bank + for pattern in patterns { + REASONING_BANK.store(pattern); + } + + // Periodic consolidation + if should_consolidate() { + REASONING_BANK.consolidate(); + } + } + + // Sleep until next batch + BackgroundWorker::wait_latch(LEARNING_INTERVAL_MS); + } +} +``` + +### GUC Configuration + +```rust +static LEARNING_ENABLED: GucSetting = GucSetting::new(false); +static LEARNING_RATE: GucSetting = GucSetting::new(0.01); +static TRAJECTORY_BUFFER_SIZE: GucSetting = GucSetting::new(10000); +static PATTERN_CONSOLIDATION_INTERVAL: GucSetting = GucSetting::new(3600); +``` + +## Optimization Strategies + +### 1. Adaptive ef_search + +```sql +-- Before: Static ef_search +SET ruvector.ef_search = 40; +SELECT * FROM items ORDER BY embedding <-> query_vec LIMIT 10; + +-- After: Adaptive ef_search based on learned patterns +SELECT * FROM items +ORDER BY embedding <-> query_vec +LIMIT 10 +WITH (adaptive_search := true); +``` + +### 2. Query-Aware Probing + +For IVFFlat, learn optimal probe counts per query cluster: + +```rust +pub fn adaptive_probes(&self, query: &[f32]) -> u32 { + let cluster_id = self.assign_cluster(query); + self.learned_probes.get(&cluster_id).unwrap_or(&self.default_probes) +} +``` + +### 3. Index Selection + +Learn when to use HNSW vs IVFFlat: + +```rust +pub fn select_index(&self, query: &[f32], k: usize) -> IndexType { + let features = QueryFeatures::extract(query, k); + self.index_selector.predict(&features) +} +``` + +## Benchmarks + +### Metrics to Track + +| Metric | Baseline | Target | Measurement | +|--------|----------|--------|-------------| +| Recall@10 | 0.95 | 0.98 | After 10K queries | +| p99 Latency | 5ms | 3ms | After learning | +| Memory Overhead | 0 | <100MB | Pattern storage | +| Learning Time | N/A | <1s/1K queries | Background processing | + +### Benchmark Queries + +```sql +-- Measure recall improvement +SELECT ruvector_benchmark_recall( + table_name := 'embeddings', + ground_truth_table := 'embeddings_ground_truth', + num_queries := 1000, + k := 10 +); + +-- Measure latency improvement +SELECT ruvector_benchmark_latency( + table_name := 'embeddings', + num_queries := 10000, + k := 10, + percentiles := ARRAY[50, 90, 99] +); +``` + +## Dependencies + +```toml +[dependencies] +# Existing ruvector crates (optional integration) +# ruvector-core = { path = "../ruvector-core", optional = true } + +# Pattern storage +dashmap = "6.0" +parking_lot = "0.12" + +# Statistics +statrs = "0.16" + +# Clustering for pattern extraction +linfa = "0.7" +linfa-clustering = "0.7" + +# Serialization for pattern export/import +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +``` + +## Feature Flags + +```toml +[features] +learning = [] +learning-advanced = ["learning", "linfa", "linfa-clustering"] +learning-distributed = ["learning", "ruvector-replication"] +``` + +## Migration Path + +1. **v0.2.0**: Basic trajectory tracking, manual feedback +2. **v0.3.0**: Verdict judgment, automatic pattern extraction +3. **v0.4.0**: Full ReasoningBank, adaptive search +4. **v0.5.0**: Distributed learning across replicas + +## Security Considerations + +- Pattern data is stored locally, no external transmission +- Trajectory data can be anonymized (hash query vectors) +- Learning can be disabled per-table for sensitive data +- Export/import requires superuser privileges diff --git a/crates/ruvector-postgres/docs/integration-plans/02-attention-mechanisms.md b/crates/ruvector-postgres/docs/integration-plans/02-attention-mechanisms.md new file mode 100644 index 00000000..43c838c3 --- /dev/null +++ b/crates/ruvector-postgres/docs/integration-plans/02-attention-mechanisms.md @@ -0,0 +1,545 @@ +# Attention Mechanisms Integration Plan + +## Overview + +Integrate 39 attention mechanisms from `ruvector-attention` into PostgreSQL, enabling attention-weighted vector search, transformer-style queries, and neural reranking directly in SQL. + +## Architecture + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ PostgreSQL Extension │ +├──────────────────────────────────────────────────────────────────┤ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ Attention Registry │ │ +│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────────────┐ │ │ +│ │ │ Flash │ │ Linear │ │ MoE │ │ Hyperbolic │ │ │ +│ │ └────┬────┘ └────┬────┘ └────┬────┘ └────────┬────────┘ │ │ +│ └───────┼───────────┼───────────┼───────────────┼──────────┘ │ +│ └───────────┴───────────┴───────────────┘ │ +│ ▼ │ +│ ┌───────────────────────────┐ │ +│ │ SIMD-Accelerated Core │ │ +│ │ (AVX-512/AVX2/NEON) │ │ +│ └───────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────┘ +``` + +## Module Structure + +``` +src/ +├── attention/ +│ ├── mod.rs # Module exports & registry +│ ├── core/ +│ │ ├── scaled_dot.rs # Scaled dot-product attention +│ │ ├── multi_head.rs # Multi-head attention +│ │ ├── flash.rs # Flash Attention v2 +│ │ └── linear.rs # Linear attention O(n) +│ ├── graph/ +│ │ ├── gat.rs # Graph Attention +│ │ ├── gatv2.rs # GATv2 (dynamic) +│ │ └── sparse.rs # Sparse attention patterns +│ ├── specialized/ +│ │ ├── moe.rs # Mixture of Experts +│ │ ├── cross.rs # Cross-attention +│ │ └── sliding.rs # Sliding window +│ ├── hyperbolic/ +│ │ ├── poincare.rs # Poincaré attention +│ │ └── lorentz.rs # Lorentzian attention +│ └── operators.rs # PostgreSQL operators +``` + +## SQL Interface + +### Basic Attention Operations + +```sql +-- Create attention-weighted index +CREATE INDEX ON documents USING ruvector_attention ( + embedding vector(768) +) WITH ( + attention_type = 'flash', + num_heads = 8, + head_dim = 96 +); + +-- Attention-weighted search +SELECT id, content, + ruvector_attention_score(embedding, query_vec, 'scaled_dot') AS score +FROM documents +ORDER BY score DESC +LIMIT 10; + +-- Multi-head attention search +SELECT * FROM ruvector_mha_search( + table_name := 'documents', + query := query_embedding, + num_heads := 8, + k := 10 +); +``` + +### Advanced Attention Queries + +```sql +-- Cross-attention between two tables (Q from queries, K/V from documents) +SELECT q.id AS query_id, d.id AS doc_id, score +FROM ruvector_cross_attention( + query_table := 'queries', + query_column := 'embedding', + document_table := 'documents', + document_column := 'embedding', + attention_type := 'scaled_dot' +) AS (query_id int, doc_id int, score float); + +-- Mixture of Experts routing +SELECT id, + ruvector_moe_route(embedding, num_experts := 8, top_k := 2) AS expert_weights +FROM documents; + +-- Sliding window attention for long sequences +SELECT * FROM ruvector_sliding_attention( + embeddings := embedding_array, + window_size := 256, + stride := 128 +); +``` + +### Attention Types + +```sql +-- List available attention mechanisms +SELECT * FROM ruvector_attention_types(); + +-- Result: +-- | name | complexity | best_for | +-- |-------------------|------------|-----------------------------| +-- | scaled_dot | O(n²) | Small sequences (<512) | +-- | flash_v2 | O(n²) | GPU, memory-efficient | +-- | linear | O(n) | Long sequences (>4K) | +-- | sparse | O(n√n) | Very long sequences | +-- | gat | O(E) | Graph-structured data | +-- | moe | O(n*k) | Conditional computation | +-- | hyperbolic | O(n²) | Hierarchical data | +``` + +## Implementation Phases + +### Phase 1: Core Attention (Week 1-3) + +```rust +// src/attention/core/scaled_dot.rs + +use simsimd::SpatialSimilarity; + +pub struct ScaledDotAttention { + scale: f32, + dropout: Option, +} + +impl ScaledDotAttention { + pub fn new(head_dim: usize) -> Self { + Self { + scale: 1.0 / (head_dim as f32).sqrt(), + dropout: None, + } + } + + /// Compute attention scores between query and keys + /// Returns softmax(Q·K^T / √d_k) + #[inline] + pub fn attention_scores(&self, query: &[f32], keys: &[&[f32]]) -> Vec { + let mut scores: Vec = keys.iter() + .map(|k| self.dot_product(query, k) * self.scale) + .collect(); + + softmax_inplace(&mut scores); + scores + } + + /// SIMD-accelerated dot product + #[inline] + fn dot_product(&self, a: &[f32], b: &[f32]) -> f32 { + f32::dot(a, b).unwrap_or_else(|| { + a.iter().zip(b.iter()).map(|(x, y)| x * y).sum() + }) + } +} + +// PostgreSQL function +#[pg_extern(immutable, parallel_safe)] +fn ruvector_attention_score( + query: Vec, + key: Vec, + attention_type: default!(&str, "'scaled_dot'"), +) -> f32 { + let attention = get_attention_impl(attention_type); + attention.score(&query, &key) +} +``` + +### Phase 2: Multi-Head Attention (Week 4-5) + +```rust +// src/attention/core/multi_head.rs + +pub struct MultiHeadAttention { + num_heads: usize, + head_dim: usize, + w_q: Matrix, + w_k: Matrix, + w_v: Matrix, + w_o: Matrix, +} + +impl MultiHeadAttention { + pub fn forward(&self, query: &[f32], keys: &[&[f32]], values: &[&[f32]]) -> Vec { + // Project to heads + let q_heads = self.split_heads(&self.project(query, &self.w_q)); + let k_heads: Vec<_> = keys.iter() + .map(|k| self.split_heads(&self.project(k, &self.w_k))) + .collect(); + let v_heads: Vec<_> = values.iter() + .map(|v| self.split_heads(&self.project(v, &self.w_v))) + .collect(); + + // Attention per head (parallelizable) + let head_outputs: Vec> = (0..self.num_heads) + .into_par_iter() + .map(|h| { + let scores = self.attention_scores(&q_heads[h], &k_heads, h); + self.weighted_sum(&scores, &v_heads, h) + }) + .collect(); + + // Concatenate and project + let concat = self.concat_heads(&head_outputs); + self.project(&concat, &self.w_o) + } +} + +// PostgreSQL aggregate for batch attention +#[pg_extern] +fn ruvector_mha_search( + table_name: &str, + query: Vec, + num_heads: default!(i32, 8), + k: default!(i32, 10), +) -> TableIterator<'static, (name!(id, i64), name!(score, f32))> { + // Implementation using SPI +} +``` + +### Phase 3: Flash Attention (Week 6-7) + +```rust +// src/attention/core/flash.rs + +/// Flash Attention v2 - memory-efficient attention +/// Processes attention in blocks to minimize memory bandwidth +pub struct FlashAttention { + block_size_q: usize, + block_size_kv: usize, + scale: f32, +} + +impl FlashAttention { + /// Tiled attention computation + /// Memory: O(√N) instead of O(N²) + pub fn forward( + &self, + q: &[f32], // [seq_len, head_dim] + k: &[f32], // [seq_len, head_dim] + v: &[f32], // [seq_len, head_dim] + ) -> Vec { + let seq_len = q.len() / self.head_dim; + let mut output = vec![0.0; q.len()]; + let mut row_max = vec![f32::NEG_INFINITY; seq_len]; + let mut row_sum = vec![0.0; seq_len]; + + // Process in blocks + for q_block in (0..seq_len).step_by(self.block_size_q) { + for kv_block in (0..seq_len).step_by(self.block_size_kv) { + self.process_block( + q, k, v, + q_block, kv_block, + &mut output, &mut row_max, &mut row_sum + ); + } + } + + output + } +} +``` + +### Phase 4: Graph Attention (Week 8-9) + +```rust +// src/attention/graph/gat.rs + +/// Graph Attention Network layer +pub struct GATLayer { + num_heads: usize, + in_features: usize, + out_features: usize, + attention_weights: Vec>, // [num_heads, 2 * out_features] + leaky_relu_slope: f32, +} + +impl GATLayer { + /// Compute attention coefficients for graph edges + pub fn forward( + &self, + node_features: &[Vec], // [num_nodes, in_features] + edge_index: &[(usize, usize)], // [(src, dst), ...] + ) -> Vec> { + // Transform features + let h = self.linear_transform(node_features); + + // Compute attention for each edge + let edge_attention: Vec> = edge_index.par_iter() + .map(|(src, dst)| { + (0..self.num_heads) + .map(|head| self.edge_attention(head, &h[*src], &h[*dst])) + .collect() + }) + .collect(); + + // Aggregate with attention weights + self.aggregate(&h, edge_index, &edge_attention) + } +} + +// PostgreSQL function for graph-based search +#[pg_extern] +fn ruvector_gat_search( + node_table: &str, + edge_table: &str, + query_node_id: i64, + num_heads: default!(i32, 4), + k: default!(i32, 10), +) -> TableIterator<'static, (name!(node_id, i64), name!(attention_score, f32))> { + // Implementation +} +``` + +### Phase 5: Hyperbolic Attention (Week 10-11) + +```rust +// src/attention/hyperbolic/poincare.rs + +/// Poincaré ball attention for hierarchical data +pub struct PoincareAttention { + curvature: f32, // -1/c² where c is the ball radius + head_dim: usize, +} + +impl PoincareAttention { + /// Möbius addition in Poincaré ball + fn mobius_add(&self, x: &[f32], y: &[f32]) -> Vec { + let x_norm_sq = self.norm_sq(x); + let y_norm_sq = self.norm_sq(y); + let xy_dot = self.dot(x, y); + + let c = -self.curvature; + let num_coef = 1.0 + 2.0 * c * xy_dot + c * y_norm_sq; + let denom = 1.0 + 2.0 * c * xy_dot + c * c * x_norm_sq * y_norm_sq; + + x.iter().zip(y.iter()) + .map(|(xi, yi)| (num_coef * xi + (1.0 - c * x_norm_sq) * yi) / denom) + .collect() + } + + /// Hyperbolic distance + fn distance(&self, x: &[f32], y: &[f32]) -> f32 { + let diff = self.mobius_add(x, &self.negate(y)); + let c = -self.curvature; + let norm = self.norm(&diff); + (2.0 / c.sqrt()) * (c.sqrt() * norm).atanh() + } + + /// Attention in hyperbolic space + pub fn attention_scores(&self, query: &[f32], keys: &[&[f32]]) -> Vec { + let distances: Vec = keys.iter() + .map(|k| -self.distance(query, k)) // Negative distance as similarity + .collect(); + + softmax(&distances) + } +} + +#[pg_extern(immutable, parallel_safe)] +fn ruvector_hyperbolic_distance( + a: Vec, + b: Vec, + curvature: default!(f32, 1.0), +) -> f32 { + let attention = PoincareAttention::new(curvature, a.len()); + attention.distance(&a, &b) +} +``` + +### Phase 6: Mixture of Experts (Week 12) + +```rust +// src/attention/specialized/moe.rs + +/// Mixture of Experts with learned routing +pub struct MixtureOfExperts { + num_experts: usize, + top_k: usize, + gate: GatingNetwork, + experts: Vec, +} + +impl MixtureOfExperts { + /// Route input to top-k experts + pub fn forward(&self, input: &[f32]) -> Vec { + // Get routing weights + let gate_logits = self.gate.forward(input); + let (top_k_indices, top_k_weights) = self.top_k_gating(&gate_logits); + + // Aggregate expert outputs + let mut output = vec![0.0; self.experts[0].output_dim()]; + for (idx, weight) in top_k_indices.iter().zip(top_k_weights.iter()) { + let expert_output = self.experts[*idx].forward(input); + for (o, e) in output.iter_mut().zip(expert_output.iter()) { + *o += weight * e; + } + } + + output + } +} + +#[pg_extern] +fn ruvector_moe_route( + embedding: Vec, + num_experts: default!(i32, 8), + top_k: default!(i32, 2), +) -> pgrx::JsonB { + let moe = get_moe_model(num_experts as usize, top_k as usize); + let (indices, weights) = moe.route(&embedding); + + pgrx::JsonB(serde_json::json!({ + "expert_indices": indices, + "expert_weights": weights, + })) +} +``` + +## Attention Type Registry + +```rust +// src/attention/mod.rs + +pub enum AttentionType { + // Core + ScaledDot, + MultiHead { num_heads: usize }, + FlashV2 { block_size: usize }, + Linear, + + // Graph + GAT { num_heads: usize }, + GATv2 { num_heads: usize }, + Sparse { pattern: SparsePattern }, + + // Specialized + MoE { num_experts: usize, top_k: usize }, + Cross, + SlidingWindow { size: usize }, + + // Hyperbolic + Poincare { curvature: f32 }, + Lorentz { curvature: f32 }, +} + +pub fn get_attention(attention_type: AttentionType) -> Box { + match attention_type { + AttentionType::ScaledDot => Box::new(ScaledDotAttention::default()), + AttentionType::FlashV2 { block_size } => Box::new(FlashAttention::new(block_size)), + // ... etc + } +} +``` + +## Performance Optimizations + +### SIMD Acceleration + +```rust +// Use simsimd for all vector operations +use simsimd::{SpatialSimilarity, BinarySimilarity}; + +#[inline] +fn batched_dot_products(query: &[f32], keys: &[&[f32]]) -> Vec { + keys.iter() + .map(|k| f32::dot(query, k).unwrap()) + .collect() +} +``` + +### Memory Layout + +```rust +// Contiguous memory for cache efficiency +pub struct AttentionCache { + // Keys stored in column-major for efficient attention + keys: Vec, // [num_keys * head_dim] + values: Vec, // [num_keys * head_dim] + num_keys: usize, + head_dim: usize, +} +``` + +### Parallel Processing + +```rust +// Parallel attention across heads +let head_outputs: Vec<_> = (0..num_heads) + .into_par_iter() + .map(|h| compute_head_attention(h, query, keys, values)) + .collect(); +``` + +## Benchmarks + +| Operation | Sequence Length | Heads | Time (μs) | Memory | +|-----------|-----------------|-------|-----------|--------| +| ScaledDot | 512 | 8 | 45 | 2MB | +| Flash | 512 | 8 | 38 | 0.5MB | +| Linear | 4096 | 8 | 120 | 4MB | +| GAT | 1000 nodes | 4 | 85 | 1MB | +| MoE (8 experts) | 512 | 8 | 95 | 3MB | + +## Dependencies + +```toml +[dependencies] +# Link to ruvector-attention for implementations +ruvector-attention = { path = "../ruvector-attention", optional = true } + +# SIMD +simsimd = "5.9" + +# Parallel processing +rayon = "1.10" + +# Matrix operations (optional, for weight matrices) +ndarray = { version = "0.15", optional = true } +``` + +## Feature Flags + +```toml +[features] +attention = [] +attention-flash = ["attention"] +attention-graph = ["attention"] +attention-hyperbolic = ["attention"] +attention-moe = ["attention"] +attention-all = ["attention-flash", "attention-graph", "attention-hyperbolic", "attention-moe"] +``` diff --git a/crates/ruvector-postgres/docs/integration-plans/03-gnn-layers.md b/crates/ruvector-postgres/docs/integration-plans/03-gnn-layers.md new file mode 100644 index 00000000..c3f8fc3d --- /dev/null +++ b/crates/ruvector-postgres/docs/integration-plans/03-gnn-layers.md @@ -0,0 +1,669 @@ +# GNN Layers Integration Plan + +## Overview + +Integrate Graph Neural Network layers from `ruvector-gnn` into PostgreSQL, enabling graph-aware vector search, message passing, and neural graph queries directly in SQL. + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ PostgreSQL Extension │ +├─────────────────────────────────────────────────────────────────┤ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ GNN Layer Registry │ │ +│ │ ┌───────┐ ┌───────┐ ┌───────┐ ┌───────┐ ┌───────────┐ │ │ +│ │ │ GCN │ │GraphSAGE│ │ GAT │ │ GIN │ │ RuVector │ │ │ +│ │ └───┬───┘ └───┬───┘ └───┬───┘ └───┬───┘ └─────┬─────┘ │ │ +│ └──────┼─────────┼─────────┼─────────┼───────────┼────────┘ │ +│ └─────────┴─────────┴─────────┴───────────┘ │ +│ ▼ │ +│ ┌───────────────────────────┐ │ +│ │ Message Passing Engine │ │ +│ │ (SIMD + Parallel) │ │ +│ └───────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Module Structure + +``` +src/ +├── gnn/ +│ ├── mod.rs # Module exports & registry +│ ├── layers/ +│ │ ├── gcn.rs # Graph Convolutional Network +│ │ ├── graphsage.rs # GraphSAGE (sampling) +│ │ ├── gat.rs # Graph Attention Network +│ │ ├── gin.rs # Graph Isomorphism Network +│ │ └── ruvector.rs # Custom RuVector layer +│ ├── message_passing.rs # Core message passing +│ ├── aggregators.rs # Sum, Mean, Max, LSTM +│ ├── graph_store.rs # PostgreSQL graph storage +│ └── operators.rs # SQL operators +``` + +## SQL Interface + +### Graph Table Setup + +```sql +-- Create node table with embeddings +CREATE TABLE nodes ( + id SERIAL PRIMARY KEY, + embedding vector(256), + features jsonb +); + +-- Create edge table +CREATE TABLE edges ( + src_id INTEGER REFERENCES nodes(id), + dst_id INTEGER REFERENCES nodes(id), + weight FLOAT DEFAULT 1.0, + edge_type TEXT, + PRIMARY KEY (src_id, dst_id) +); + +-- Create GNN-enhanced index +CREATE INDEX ON nodes USING ruvector_gnn ( + embedding vector(256) +) WITH ( + edge_table = 'edges', + layer_type = 'graphsage', + num_layers = 2, + hidden_dim = 128, + aggregator = 'mean' +); +``` + +### GNN Queries + +```sql +-- GNN-enhanced similarity search (considers graph structure) +SELECT n.id, n.embedding, + ruvector_gnn_score(n.embedding, query_vec, 'edges', 2) AS score +FROM nodes n +ORDER BY score DESC +LIMIT 10; + +-- Message passing to get updated embeddings +SELECT node_id, updated_embedding +FROM ruvector_message_pass( + node_table := 'nodes', + edge_table := 'edges', + embedding_column := 'embedding', + num_hops := 2, + layer_type := 'gcn' +); + +-- Subgraph-aware search +SELECT * FROM ruvector_subgraph_search( + center_node := 42, + query_embedding := query_vec, + max_hops := 3, + k := 10 +); + +-- Node classification with GNN +SELECT node_id, + ruvector_gnn_classify(embedding, 'edges', model_name := 'node_classifier') AS class +FROM nodes; +``` + +### Graph Construction from Vectors + +```sql +-- Build k-NN graph from embeddings +SELECT ruvector_build_knn_graph( + node_table := 'nodes', + embedding_column := 'embedding', + edge_table := 'edges_knn', + k := 10, + distance_metric := 'cosine' +); + +-- Build epsilon-neighborhood graph +SELECT ruvector_build_eps_graph( + node_table := 'nodes', + embedding_column := 'embedding', + edge_table := 'edges_eps', + epsilon := 0.5 +); +``` + +## Implementation Phases + +### Phase 1: Message Passing Core (Week 1-3) + +```rust +// src/gnn/message_passing.rs + +/// Generic message passing framework +pub trait MessagePassing { + /// Compute messages from neighbors + fn message(&self, x_j: &[f32], edge_attr: Option<&[f32]>) -> Vec; + + /// Aggregate messages + fn aggregate(&self, messages: &[Vec]) -> Vec; + + /// Update node embedding + fn update(&self, x_i: &[f32], aggregated: &[f32]) -> Vec; +} + +/// SIMD-optimized message passing +pub struct MessagePassingEngine { + aggregator: Aggregator, +} + +impl MessagePassingEngine { + pub fn propagate( + &self, + node_features: &[Vec], + edge_index: &[(usize, usize)], + edge_weights: Option<&[f32]>, + layer: &dyn MessagePassing, + ) -> Vec> { + let num_nodes = node_features.len(); + + // Build adjacency list + let adj_list = self.build_adjacency_list(edge_index, num_nodes); + + // Parallel message passing + (0..num_nodes) + .into_par_iter() + .map(|i| { + let neighbors = &adj_list[i]; + if neighbors.is_empty() { + return node_features[i].clone(); + } + + // Collect messages from neighbors + let messages: Vec> = neighbors.iter() + .map(|&j| { + let edge_attr = edge_weights.map(|w| &w[j..j+1]); + layer.message(&node_features[j], edge_attr.map(|e| e.as_ref())) + }) + .collect(); + + // Aggregate + let aggregated = layer.aggregate(&messages); + + // Update + layer.update(&node_features[i], &aggregated) + }) + .collect() + } +} +``` + +### Phase 2: GCN Layer (Week 4-5) + +```rust +// src/gnn/layers/gcn.rs + +/// Graph Convolutional Network layer +/// H' = σ(D^(-1/2) A D^(-1/2) H W) +pub struct GCNLayer { + in_features: usize, + out_features: usize, + weights: Vec, // [in_features, out_features] + bias: Option>, + activation: Activation, +} + +impl GCNLayer { + pub fn new(in_features: usize, out_features: usize, bias: bool) -> Self { + let weights = Self::glorot_init(in_features, out_features); + Self { + in_features, + out_features, + weights, + bias: if bias { Some(vec![0.0; out_features]) } else { None }, + activation: Activation::ReLU, + } + } + + /// Forward pass with normalized adjacency + pub fn forward( + &self, + x: &[Vec], + edge_index: &[(usize, usize)], + edge_weights: &[f32], + ) -> Vec> { + // Transform features: XW + let transformed: Vec> = x.par_iter() + .map(|xi| self.linear_transform(xi)) + .collect(); + + // Message passing with normalized weights + let propagated = self.propagate(&transformed, edge_index, edge_weights); + + // Apply activation + propagated.into_iter() + .map(|h| self.activate(&h)) + .collect() + } + + #[inline] + fn linear_transform(&self, x: &[f32]) -> Vec { + let mut out = vec![0.0; self.out_features]; + for i in 0..self.out_features { + for j in 0..self.in_features { + out[i] += x[j] * self.weights[j * self.out_features + i]; + } + if let Some(ref bias) = self.bias { + out[i] += bias[i]; + } + } + out + } +} + +// PostgreSQL function +#[pg_extern] +fn ruvector_gcn_forward( + node_embeddings: Vec>, + edge_src: Vec, + edge_dst: Vec, + edge_weights: Vec, + out_features: i32, +) -> Vec> { + let layer = GCNLayer::new( + node_embeddings[0].len(), + out_features as usize, + true + ); + + let edges: Vec<_> = edge_src.iter() + .zip(edge_dst.iter()) + .map(|(&s, &d)| (s as usize, d as usize)) + .collect(); + + layer.forward(&node_embeddings, &edges, &edge_weights) +} +``` + +### Phase 3: GraphSAGE Layer (Week 6-7) + +```rust +// src/gnn/layers/graphsage.rs + +/// GraphSAGE with neighborhood sampling +pub struct GraphSAGELayer { + in_features: usize, + out_features: usize, + aggregator: SAGEAggregator, + sample_size: usize, + weights_self: Vec, + weights_neigh: Vec, +} + +pub enum SAGEAggregator { + Mean, + MaxPool { mlp: MLP }, + LSTM { lstm: LSTMCell }, + GCN, +} + +impl GraphSAGELayer { + pub fn forward_with_sampling( + &self, + x: &[Vec], + edge_index: &[(usize, usize)], + num_samples: usize, + ) -> Vec> { + let adj_list = build_adjacency_list(edge_index, x.len()); + + x.par_iter().enumerate() + .map(|(i, xi)| { + // Sample neighbors + let neighbors = self.sample_neighbors(&adj_list[i], num_samples); + + // Aggregate neighbor features + let neighbor_features: Vec<&[f32]> = neighbors.iter() + .map(|&j| x[j].as_slice()) + .collect(); + let aggregated = self.aggregate(&neighbor_features); + + // Combine self and neighbor + self.combine(xi, &aggregated) + }) + .collect() + } + + fn sample_neighbors(&self, neighbors: &[usize], k: usize) -> Vec { + if neighbors.len() <= k { + return neighbors.to_vec(); + } + // Uniform random sampling + neighbors.choose_multiple(&mut rand::thread_rng(), k) + .cloned() + .collect() + } + + fn aggregate(&self, features: &[&[f32]]) -> Vec { + match &self.aggregator { + SAGEAggregator::Mean => { + let dim = features[0].len(); + let mut result = vec![0.0; dim]; + for f in features { + for (r, &v) in result.iter_mut().zip(f.iter()) { + *r += v; + } + } + let n = features.len() as f32; + result.iter_mut().for_each(|r| *r /= n); + result + } + SAGEAggregator::MaxPool { mlp } => { + features.iter() + .map(|f| mlp.forward(f)) + .reduce(|a, b| element_wise_max(&a, &b)) + .unwrap() + } + // ... other aggregators + } + } +} + +#[pg_extern] +fn ruvector_graphsage_search( + node_table: &str, + edge_table: &str, + query: Vec, + num_layers: default!(i32, 2), + sample_size: default!(i32, 10), + k: default!(i32, 10), +) -> TableIterator<'static, (name!(id, i64), name!(score, f32))> { + // Implementation using SPI +} +``` + +### Phase 4: Graph Isomorphism Network (Week 8) + +```rust +// src/gnn/layers/gin.rs + +/// Graph Isomorphism Network - maximally expressive +/// h_v = MLP((1 + ε) * h_v + Σ h_u) +pub struct GINLayer { + mlp: MLP, + eps: f32, + train_eps: bool, +} + +impl GINLayer { + pub fn forward( + &self, + x: &[Vec], + edge_index: &[(usize, usize)], + ) -> Vec> { + let adj_list = build_adjacency_list(edge_index, x.len()); + + x.par_iter().enumerate() + .map(|(i, xi)| { + // Sum neighbor features + let sum_neighbors: Vec = adj_list[i].iter() + .fold(vec![0.0; xi.len()], |mut acc, &j| { + for (a, &v) in acc.iter_mut().zip(x[j].iter()) { + *a += v; + } + acc + }); + + // (1 + eps) * self + sum_neighbors + let combined: Vec = xi.iter() + .zip(sum_neighbors.iter()) + .map(|(&s, &n)| (1.0 + self.eps) * s + n) + .collect(); + + // MLP + self.mlp.forward(&combined) + }) + .collect() + } +} +``` + +### Phase 5: Custom RuVector Layer (Week 9-10) + +```rust +// src/gnn/layers/ruvector.rs + +/// RuVector's custom differentiable search layer +/// Combines HNSW navigation with learned message passing +pub struct RuVectorLayer { + in_features: usize, + out_features: usize, + num_hops: usize, + attention: MultiHeadAttention, + transform: Linear, +} + +impl RuVectorLayer { + /// Forward pass using HNSW graph structure + pub fn forward( + &self, + query: &[f32], + hnsw_index: &HnswIndex, + k_neighbors: usize, + ) -> Vec { + // Get k nearest neighbors from HNSW + let neighbors = hnsw_index.search(query, k_neighbors); + + // Multi-hop aggregation following HNSW structure + let mut current = query.to_vec(); + for hop in 0..self.num_hops { + let neighbor_features: Vec<&[f32]> = neighbors.iter() + .flat_map(|n| hnsw_index.get_neighbors(n.id)) + .map(|id| hnsw_index.get_vector(id)) + .collect(); + + // Attention-weighted aggregation + current = self.attention.forward(¤t, &neighbor_features); + } + + self.transform.forward(¤t) + } +} + +#[pg_extern] +fn ruvector_differentiable_search( + query: Vec, + index_name: &str, + num_hops: default!(i32, 2), + k: default!(i32, 10), +) -> TableIterator<'static, (name!(id, i64), name!(score, f32), name!(enhanced_embedding, Vec))> { + // Combines vector search with GNN enhancement +} +``` + +### Phase 6: Graph Storage (Week 11-12) + +```rust +// src/gnn/graph_store.rs + +/// Efficient graph storage for PostgreSQL +pub struct GraphStore { + node_embeddings: SharedMemory>, + adjacency: CompressedSparseRow, + edge_features: Option>>, +} + +impl GraphStore { + /// Load graph from PostgreSQL tables + pub fn from_tables( + node_table: &str, + embedding_column: &str, + edge_table: &str, + ) -> Result { + Spi::connect(|client| { + // Load nodes + let nodes = client.select( + &format!("SELECT id, {} FROM {}", embedding_column, node_table), + None, None + )?; + + // Load edges + let edges = client.select( + &format!("SELECT src_id, dst_id, weight FROM {}", edge_table), + None, None + )?; + + // Build CSR + let csr = CompressedSparseRow::from_edges(&edges); + + Ok(Self { + node_embeddings: SharedMemory::new(nodes), + adjacency: csr, + edge_features: None, + }) + }) + } + + /// Efficient neighbor lookup + pub fn neighbors(&self, node_id: usize) -> &[usize] { + self.adjacency.neighbors(node_id) + } +} + +/// Compressed Sparse Row format for adjacency +pub struct CompressedSparseRow { + indptr: Vec, // Row pointers + indices: Vec, // Column indices + data: Vec, // Edge weights +} +``` + +## Aggregator Functions + +```rust +// src/gnn/aggregators.rs + +pub enum Aggregator { + Sum, + Mean, + Max, + Min, + Attention { heads: usize }, + Set2Set { steps: usize }, +} + +impl Aggregator { + pub fn aggregate(&self, messages: &[Vec]) -> Vec { + match self { + Aggregator::Sum => Self::sum_aggregate(messages), + Aggregator::Mean => Self::mean_aggregate(messages), + Aggregator::Max => Self::max_aggregate(messages), + Aggregator::Attention { heads } => Self::attention_aggregate(messages, *heads), + _ => unimplemented!(), + } + } + + fn sum_aggregate(messages: &[Vec]) -> Vec { + let dim = messages[0].len(); + let mut result = vec![0.0; dim]; + for msg in messages { + for (r, &m) in result.iter_mut().zip(msg.iter()) { + *r += m; + } + } + result + } + + fn attention_aggregate(messages: &[Vec], heads: usize) -> Vec { + // Multi-head attention over messages + let mha = MultiHeadAttention::new(messages[0].len(), heads); + mha.aggregate(messages) + } +} +``` + +## Performance Optimizations + +### Batch Processing + +```rust +/// Process multiple nodes in parallel batches +pub fn batch_message_passing( + nodes: &[Vec], + edge_index: &[(usize, usize)], + batch_size: usize, +) -> Vec> { + nodes.par_chunks(batch_size) + .flat_map(|batch| { + // Process batch with SIMD + process_batch(batch, edge_index) + }) + .collect() +} +``` + +### Sparse Operations + +```rust +/// Sparse matrix multiplication for message passing +pub fn sparse_mm( + node_features: &[Vec], + csr: &CompressedSparseRow, +) -> Vec> { + let dim = node_features[0].len(); + let num_nodes = node_features.len(); + + (0..num_nodes).into_par_iter() + .map(|i| { + let start = csr.indptr[i]; + let end = csr.indptr[i + 1]; + + let mut result = vec![0.0; dim]; + for j in start..end { + let neighbor = csr.indices[j]; + let weight = csr.data[j]; + for (r, &f) in result.iter_mut().zip(node_features[neighbor].iter()) { + *r += weight * f; + } + } + result + }) + .collect() +} +``` + +## Benchmarks + +| Layer | Nodes | Edges | Features | Time (ms) | Memory | +|-------|-------|-------|----------|-----------|--------| +| GCN | 10K | 100K | 256 | 12 | 40MB | +| GraphSAGE | 10K | 100K | 256 | 18 | 45MB | +| GAT (4 heads) | 10K | 100K | 256 | 35 | 60MB | +| GIN | 10K | 100K | 256 | 15 | 42MB | +| RuVector | 10K | 100K | 256 | 25 | 55MB | + +## Dependencies + +```toml +[dependencies] +# Link to ruvector-gnn +ruvector-gnn = { path = "../ruvector-gnn", optional = true } + +# Sparse matrix +sprs = "0.11" + +# Parallel +rayon = "1.10" + +# SIMD +simsimd = "5.9" +``` + +## Feature Flags + +```toml +[features] +gnn = [] +gnn-gcn = ["gnn"] +gnn-sage = ["gnn"] +gnn-gat = ["gnn", "attention"] +gnn-gin = ["gnn"] +gnn-all = ["gnn-gcn", "gnn-sage", "gnn-gat", "gnn-gin"] +``` diff --git a/crates/ruvector-postgres/docs/integration-plans/04-hyperbolic-embeddings.md b/crates/ruvector-postgres/docs/integration-plans/04-hyperbolic-embeddings.md new file mode 100644 index 00000000..eeb03cf7 --- /dev/null +++ b/crates/ruvector-postgres/docs/integration-plans/04-hyperbolic-embeddings.md @@ -0,0 +1,634 @@ +# Hyperbolic Embeddings Integration Plan + +## Overview + +Integrate hyperbolic geometry operations into PostgreSQL for hierarchical data representation, enabling embeddings in Poincaré ball and Lorentz (hyperboloid) models with native distance functions and indexing. + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ PostgreSQL Extension │ +├─────────────────────────────────────────────────────────────────┤ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Hyperbolic Type System │ │ +│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ │ +│ │ │ Poincaré │ │ Lorentz │ │ Klein │ │ │ +│ │ │ Ball │ │ Hyperboloid │ │ Model │ │ │ +│ │ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ │ +│ └─────────┼─────────────────┼─────────────────┼───────────┘ │ +│ └─────────────────┴─────────────────┘ │ +│ ▼ │ +│ ┌───────────────────────────┐ │ +│ │ Riemannian Operations │ │ +│ │ (Exponential, Log, PT) │ │ +│ └───────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Module Structure + +``` +src/ +├── hyperbolic/ +│ ├── mod.rs # Module exports +│ ├── types/ +│ │ ├── poincare.rs # Poincaré ball model +│ │ ├── lorentz.rs # Lorentz/hyperboloid model +│ │ └── klein.rs # Klein model (projective) +│ ├── manifold.rs # Manifold operations +│ ├── distance.rs # Distance functions +│ ├── index/ +│ │ ├── htree.rs # Hyperbolic tree index +│ │ └── hnsw_hyper.rs # HNSW for hyperbolic space +│ └── operators.rs # SQL operators +``` + +## SQL Interface + +### Hyperbolic Types + +```sql +-- Create hyperbolic embedding column +CREATE TABLE hierarchical_nodes ( + id SERIAL PRIMARY KEY, + name TEXT, + euclidean_embedding vector(128), + poincare_embedding hyperbolic(128), -- Poincaré ball + lorentz_embedding hyperboloid(129), -- Lorentz model (d+1 dims) + curvature FLOAT DEFAULT -1.0 +); + +-- Insert with automatic projection +INSERT INTO hierarchical_nodes (name, euclidean_embedding) +VALUES ('root', '[0.1, 0.2, ...]'); + +-- Auto-project to hyperbolic space +UPDATE hierarchical_nodes +SET poincare_embedding = ruvector_to_poincare(euclidean_embedding, curvature); +``` + +### Distance Operations + +```sql +-- Poincaré distance +SELECT id, name, + ruvector_poincare_distance(poincare_embedding, query_point) AS dist +FROM hierarchical_nodes +ORDER BY dist +LIMIT 10; + +-- Lorentz distance (often more numerically stable) +SELECT id, name, + ruvector_lorentz_distance(lorentz_embedding, query_point) AS dist +FROM hierarchical_nodes +ORDER BY dist +LIMIT 10; + +-- Custom curvature +SELECT ruvector_hyperbolic_distance( + a := point_a, + b := point_b, + model := 'poincare', + curvature := -0.5 +); +``` + +### Hyperbolic Operations + +```sql +-- Möbius addition (translation in Poincaré ball) +SELECT ruvector_mobius_add(point_a, point_b, curvature := -1.0); + +-- Exponential map (tangent vector → manifold point) +SELECT ruvector_exp_map(base_point, tangent_vector, curvature := -1.0); + +-- Logarithmic map (manifold point → tangent vector) +SELECT ruvector_log_map(base_point, target_point, curvature := -1.0); + +-- Parallel transport (move vector along geodesic) +SELECT ruvector_parallel_transport(vector, from_point, to_point, curvature := -1.0); + +-- Geodesic midpoint +SELECT ruvector_geodesic_midpoint(point_a, point_b); + +-- Project Euclidean to hyperbolic +SELECT ruvector_project_to_hyperbolic(euclidean_vec, model := 'poincare'); +``` + +### Hyperbolic Index + +```sql +-- Create hyperbolic HNSW index +CREATE INDEX ON hierarchical_nodes USING ruvector_hyperbolic ( + poincare_embedding hyperbolic(128) +) WITH ( + model = 'poincare', + curvature = -1.0, + m = 16, + ef_construction = 64 +); + +-- Hyperbolic k-NN search +SELECT * FROM hierarchical_nodes +ORDER BY poincare_embedding <~> query_point -- <~> is hyperbolic distance +LIMIT 10; +``` + +## Implementation Phases + +### Phase 1: Poincaré Ball Model (Week 1-3) + +```rust +// src/hyperbolic/types/poincare.rs + +use simsimd::SpatialSimilarity; + +/// Poincaré ball model B^n_c = {x ∈ R^n : c||x||² < 1} +pub struct PoincareBall { + dim: usize, + curvature: f32, // Negative curvature, typically -1.0 +} + +impl PoincareBall { + pub fn new(dim: usize, curvature: f32) -> Self { + assert!(curvature < 0.0, "Curvature must be negative"); + Self { dim, curvature } + } + + /// Conformal factor λ_c(x) = 2 / (1 - c||x||²) + #[inline] + fn conformal_factor(&self, x: &[f32]) -> f32 { + let c = -self.curvature; + let norm_sq = self.norm_sq(x); + 2.0 / (1.0 - c * norm_sq) + } + + /// Poincaré distance: d(x,y) = (2/√c) * arctanh(√c * ||−x ⊕_c y||) + pub fn distance(&self, x: &[f32], y: &[f32]) -> f32 { + let c = -self.curvature; + let sqrt_c = c.sqrt(); + + // Möbius addition: -x ⊕ y + let neg_x: Vec = x.iter().map(|&xi| -xi).collect(); + let mobius_sum = self.mobius_add(&neg_x, y); + let norm = self.norm(&mobius_sum); + + (2.0 / sqrt_c) * (sqrt_c * norm).atanh() + } + + /// Möbius addition in Poincaré ball + pub fn mobius_add(&self, x: &[f32], y: &[f32]) -> Vec { + let c = -self.curvature; + let x_norm_sq = self.norm_sq(x); + let y_norm_sq = self.norm_sq(y); + let xy_dot = self.dot(x, y); + + let num_coef = 1.0 + 2.0 * c * xy_dot + c * y_norm_sq; + let y_coef = 1.0 - c * x_norm_sq; + let denom = 1.0 + 2.0 * c * xy_dot + c * c * x_norm_sq * y_norm_sq; + + x.iter().zip(y.iter()) + .map(|(&xi, &yi)| (num_coef * xi + y_coef * yi) / denom) + .collect() + } + + /// Exponential map: tangent space → manifold + pub fn exp_map(&self, base: &[f32], tangent: &[f32]) -> Vec { + let c = -self.curvature; + let sqrt_c = c.sqrt(); + + let lambda = self.conformal_factor(base); + let tangent_norm = self.norm(tangent); + + if tangent_norm < 1e-10 { + return base.to_vec(); + } + + let coef = (sqrt_c * lambda * tangent_norm / 2.0).tanh() / (sqrt_c * tangent_norm); + let direction: Vec = tangent.iter().map(|&t| t * coef).collect(); + + self.mobius_add(base, &direction) + } + + /// Logarithmic map: manifold → tangent space + pub fn log_map(&self, base: &[f32], target: &[f32]) -> Vec { + let c = -self.curvature; + let sqrt_c = c.sqrt(); + + // -base ⊕ target + let neg_base: Vec = base.iter().map(|&b| -b).collect(); + let addition = self.mobius_add(&neg_base, target); + let add_norm = self.norm(&addition); + + if add_norm < 1e-10 { + return vec![0.0; self.dim]; + } + + let lambda = self.conformal_factor(base); + let coef = (2.0 / (sqrt_c * lambda)) * (sqrt_c * add_norm).atanh() / add_norm; + + addition.iter().map(|&a| a * coef).collect() + } + + /// Project point to ball (clamp norm) + pub fn project(&self, x: &[f32]) -> Vec { + let c = -self.curvature; + let max_norm = (1.0 / c).sqrt() - 1e-5; + let norm = self.norm(x); + + if norm <= max_norm { + x.to_vec() + } else { + let scale = max_norm / norm; + x.iter().map(|&xi| xi * scale).collect() + } + } + + #[inline] + fn norm_sq(&self, x: &[f32]) -> f32 { + f32::dot(x, x).unwrap_or_else(|| x.iter().map(|&xi| xi * xi).sum()) + } + + #[inline] + fn norm(&self, x: &[f32]) -> f32 { + self.norm_sq(x).sqrt() + } + + #[inline] + fn dot(&self, x: &[f32], y: &[f32]) -> f32 { + f32::dot(x, y).unwrap_or_else(|| x.iter().zip(y.iter()).map(|(&a, &b)| a * b).sum()) + } +} + +// PostgreSQL type +#[derive(PostgresType, Serialize, Deserialize)] +#[pgx(sql = "CREATE TYPE hyperbolic")] +pub struct Hyperbolic { + data: Vec, + curvature: f32, +} + +// PostgreSQL functions +#[pg_extern(immutable, parallel_safe)] +fn ruvector_poincare_distance(a: Vec, b: Vec, curvature: default!(f32, -1.0)) -> f32 { + let ball = PoincareBall::new(a.len(), curvature); + ball.distance(&a, &b) +} + +#[pg_extern(immutable, parallel_safe)] +fn ruvector_mobius_add(a: Vec, b: Vec, curvature: default!(f32, -1.0)) -> Vec { + let ball = PoincareBall::new(a.len(), curvature); + ball.mobius_add(&a, &b) +} + +#[pg_extern(immutable, parallel_safe)] +fn ruvector_exp_map(base: Vec, tangent: Vec, curvature: default!(f32, -1.0)) -> Vec { + let ball = PoincareBall::new(base.len(), curvature); + ball.exp_map(&base, &tangent) +} + +#[pg_extern(immutable, parallel_safe)] +fn ruvector_log_map(base: Vec, target: Vec, curvature: default!(f32, -1.0)) -> Vec { + let ball = PoincareBall::new(base.len(), curvature); + ball.log_map(&base, &target) +} +``` + +### Phase 2: Lorentz Model (Week 4-5) + +```rust +// src/hyperbolic/types/lorentz.rs + +/// Lorentz (hyperboloid) model: H^n = {x ∈ R^{n+1} : _L = -1/c, x_0 > 0} +/// More numerically stable than Poincaré for high dimensions +pub struct LorentzModel { + dim: usize, // Ambient dimension (n+1) + curvature: f32, +} + +impl LorentzModel { + /// Minkowski inner product: _L = -x_0*y_0 + Σ x_i*y_i + #[inline] + pub fn minkowski_dot(&self, x: &[f32], y: &[f32]) -> f32 { + -x[0] * y[0] + x[1..].iter().zip(y[1..].iter()) + .map(|(&a, &b)| a * b) + .sum::() + } + + /// Lorentz distance: d(x,y) = (1/√c) * arcosh(-c * _L) + pub fn distance(&self, x: &[f32], y: &[f32]) -> f32 { + let c = -self.curvature; + let sqrt_c = c.sqrt(); + let inner = self.minkowski_dot(x, y); + + (1.0 / sqrt_c) * (-c * inner).acosh() + } + + /// Exponential map on hyperboloid + pub fn exp_map(&self, base: &[f32], tangent: &[f32]) -> Vec { + let c = -self.curvature; + let sqrt_c = c.sqrt(); + + let tangent_norm_sq = self.minkowski_dot(tangent, tangent); + if tangent_norm_sq < 1e-10 { + return base.to_vec(); + } + let tangent_norm = tangent_norm_sq.sqrt(); + + let coef1 = (sqrt_c * tangent_norm).cosh(); + let coef2 = (sqrt_c * tangent_norm).sinh() / tangent_norm; + + base.iter().zip(tangent.iter()) + .map(|(&b, &t)| coef1 * b + coef2 * t) + .collect() + } + + /// Logarithmic map on hyperboloid + pub fn log_map(&self, base: &[f32], target: &[f32]) -> Vec { + let c = -self.curvature; + let sqrt_c = c.sqrt(); + + let inner = self.minkowski_dot(base, target); + let dist = self.distance(base, target); + + if dist < 1e-10 { + return vec![0.0; self.dim]; + } + + let coef = dist / (dist * sqrt_c).sinh(); + + target.iter().zip(base.iter()) + .map(|(&t, &b)| coef * (t - inner * b)) + .collect() + } + + /// Project to hyperboloid (ensure constraint satisfied) + pub fn project(&self, x: &[f32]) -> Vec { + let c = -self.curvature; + let space_norm_sq: f32 = x[1..].iter().map(|&xi| xi * xi).sum(); + let x0 = ((1.0 / c) + space_norm_sq).sqrt(); + + let mut result = vec![x0]; + result.extend_from_slice(&x[1..]); + result + } + + /// Convert from Poincaré ball to Lorentz + pub fn from_poincare(&self, poincare: &[f32], poincare_curvature: f32) -> Vec { + let c = -poincare_curvature; + let norm_sq: f32 = poincare.iter().map(|&x| x * x).sum(); + + let x0 = (1.0 + c * norm_sq) / (1.0 - c * norm_sq); + let coef = 2.0 / (1.0 - c * norm_sq); + + let mut result = vec![x0]; + result.extend(poincare.iter().map(|&p| coef * p)); + result + } + + /// Convert from Lorentz to Poincaré ball + pub fn to_poincare(&self, lorentz: &[f32]) -> Vec { + let denom = 1.0 + lorentz[0]; + lorentz[1..].iter().map(|&x| x / denom).collect() + } +} + +#[pg_extern(immutable, parallel_safe)] +fn ruvector_lorentz_distance(a: Vec, b: Vec, curvature: default!(f32, -1.0)) -> f32 { + let model = LorentzModel::new(a.len(), curvature); + model.distance(&a, &b) +} + +#[pg_extern(immutable, parallel_safe)] +fn ruvector_poincare_to_lorentz(poincare: Vec, curvature: default!(f32, -1.0)) -> Vec { + let model = LorentzModel::new(poincare.len() + 1, curvature); + model.from_poincare(&poincare, curvature) +} + +#[pg_extern(immutable, parallel_safe)] +fn ruvector_lorentz_to_poincare(lorentz: Vec) -> Vec { + let model = LorentzModel::new(lorentz.len(), -1.0); + model.to_poincare(&lorentz) +} +``` + +### Phase 3: Hyperbolic HNSW Index (Week 6-8) + +```rust +// src/hyperbolic/index/hnsw_hyper.rs + +/// HNSW index adapted for hyperbolic space +pub struct HyperbolicHnsw { + layers: Vec, + manifold: HyperbolicManifold, + m: usize, + ef_construction: usize, +} + +pub enum HyperbolicManifold { + Poincare(PoincareBall), + Lorentz(LorentzModel), +} + +impl HyperbolicHnsw { + /// Distance function based on manifold + fn distance(&self, a: &[f32], b: &[f32]) -> f32 { + match &self.manifold { + HyperbolicManifold::Poincare(ball) => ball.distance(a, b), + HyperbolicManifold::Lorentz(model) => model.distance(a, b), + } + } + + /// Insert with hyperbolic distance + pub fn insert(&mut self, id: u64, vector: &[f32]) { + // Project to manifold first + let projected = match &self.manifold { + HyperbolicManifold::Poincare(ball) => ball.project(vector), + HyperbolicManifold::Lorentz(model) => model.project(vector), + }; + + // Standard HNSW insertion with hyperbolic distance + let entry_point = self.entry_point(); + let level = self.random_level(); + + for l in (0..=level).rev() { + let candidates = self.search_layer(&projected, entry_point, self.ef_construction, l); + let neighbors = self.select_neighbors(&projected, &candidates, self.m); + self.connect(id, &neighbors, l); + } + + self.vectors.insert(id, projected); + } + + /// Search with hyperbolic distance + pub fn search(&self, query: &[f32], k: usize, ef: usize) -> Vec<(u64, f32)> { + let projected = match &self.manifold { + HyperbolicManifold::Poincare(ball) => ball.project(query), + HyperbolicManifold::Lorentz(model) => model.project(query), + }; + + let mut candidates = self.search_layer(&projected, self.entry_point(), ef, 0); + candidates.truncate(k); + candidates + } +} + +// PostgreSQL index access method +#[pg_extern] +fn ruvector_hyperbolic_hnsw_handler(internal: Internal) -> Internal { + // Index AM handler +} +``` + +### Phase 4: Euclidean to Hyperbolic Projection (Week 9-10) + +```rust +// src/hyperbolic/manifold.rs + +/// Project Euclidean embeddings to hyperbolic space +pub struct HyperbolicProjection { + model: HyperbolicModel, + method: ProjectionMethod, +} + +pub enum ProjectionMethod { + /// Direct scaling to fit in ball + Scale, + /// Learned exponential map from origin + ExponentialMap, + /// Centroid-based projection + Centroid { centroid: Vec }, +} + +impl HyperbolicProjection { + /// Project batch of Euclidean vectors + pub fn project_batch(&self, vectors: &[Vec]) -> Vec> { + match &self.method { + ProjectionMethod::Scale => { + vectors.par_iter() + .map(|v| self.scale_project(v)) + .collect() + } + ProjectionMethod::ExponentialMap => { + let origin = vec![0.0; vectors[0].len()]; + vectors.par_iter() + .map(|v| self.model.exp_map(&origin, v)) + .collect() + } + ProjectionMethod::Centroid { centroid } => { + vectors.par_iter() + .map(|v| { + let tangent: Vec = v.iter() + .zip(centroid.iter()) + .map(|(&vi, &ci)| vi - ci) + .collect(); + self.model.exp_map(centroid, &tangent) + }) + .collect() + } + } + } + + fn scale_project(&self, v: &[f32]) -> Vec { + let norm: f32 = v.iter().map(|&x| x * x).sum::().sqrt(); + let max_norm = 0.99; // Stay within ball + + if norm <= max_norm { + v.to_vec() + } else { + let scale = max_norm / norm; + v.iter().map(|&x| x * scale).collect() + } + } +} + +#[pg_extern] +fn ruvector_to_poincare( + euclidean: Vec, + curvature: default!(f32, -1.0), + method: default!(&str, "'scale'"), +) -> Vec { + let model = PoincareBall::new(euclidean.len(), curvature); + let projection = HyperbolicProjection::new(model, method.into()); + projection.project(&euclidean) +} + +#[pg_extern] +fn ruvector_batch_to_poincare( + table_name: &str, + euclidean_column: &str, + output_column: &str, + curvature: default!(f32, -1.0), +) -> i64 { + // Batch projection using SPI + Spi::connect(|client| { + // ... batch update + }) +} +``` + +## Use Cases + +### Hierarchical Data (Taxonomies, Org Charts) + +```sql +-- Embed taxonomy with parent-child relationships preserved +-- Children naturally cluster closer to parents in hyperbolic space +CREATE TABLE taxonomy ( + id SERIAL PRIMARY KEY, + name TEXT, + parent_id INTEGER REFERENCES taxonomy(id), + embedding hyperbolic(64) +); + +-- Find all items in subtree (leveraging hyperbolic geometry) +SELECT * FROM taxonomy +WHERE ruvector_poincare_distance(embedding, root_embedding) < subtree_radius +ORDER BY ruvector_poincare_distance(embedding, root_embedding); +``` + +### Knowledge Graphs + +```sql +-- Entities with hierarchical relationships +-- Hyperbolic space captures asymmetric relations naturally +SELECT entity_a.name, entity_b.name, + ruvector_poincare_distance(entity_a.embedding, entity_b.embedding) AS distance +FROM entities entity_a, entities entity_b +WHERE entity_a.id != entity_b.id +ORDER BY distance +LIMIT 100; +``` + +## Benchmarks + +| Operation | Dimension | Curvature | Time (μs) | vs Euclidean | +|-----------|-----------|-----------|-----------|--------------| +| Poincaré Distance | 128 | -1.0 | 2.1 | 1.8x slower | +| Lorentz Distance | 129 | -1.0 | 1.5 | 1.3x slower | +| Möbius Addition | 128 | -1.0 | 3.2 | N/A | +| Exp Map | 128 | -1.0 | 4.5 | N/A | +| HNSW Search (hyper) | 128 | -1.0 | 850 | 1.5x slower | + +## Dependencies + +```toml +[dependencies] +# SIMD for fast operations +simsimd = "5.9" + +# Numerical stability +num-traits = "0.2" +``` + +## Feature Flags + +```toml +[features] +hyperbolic = [] +hyperbolic-poincare = ["hyperbolic"] +hyperbolic-lorentz = ["hyperbolic"] +hyperbolic-index = ["hyperbolic", "index-hnsw"] +hyperbolic-all = ["hyperbolic-poincare", "hyperbolic-lorentz", "hyperbolic-index"] +``` diff --git a/crates/ruvector-postgres/docs/integration-plans/05-sparse-vectors.md b/crates/ruvector-postgres/docs/integration-plans/05-sparse-vectors.md new file mode 100644 index 00000000..5d14ce23 --- /dev/null +++ b/crates/ruvector-postgres/docs/integration-plans/05-sparse-vectors.md @@ -0,0 +1,703 @@ +# Sparse Vectors Integration Plan + +## Overview + +Integrate sparse vector support into PostgreSQL for efficient storage and search of high-dimensional sparse embeddings (BM25, SPLADE, learned sparse representations). + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ PostgreSQL Extension │ +├─────────────────────────────────────────────────────────────────┤ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Sparse Vector Type │ │ +│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ │ +│ │ │ COO Format │ │ CSR Format │ │ Dictionary │ │ │ +│ │ │ (indices, │ │ (sorted, │ │ (hash-based │ │ │ +│ │ │ values) │ │ compact) │ │ lookup) │ │ │ +│ │ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ │ +│ └─────────┼─────────────────┼─────────────────┼───────────┘ │ +│ └─────────────────┴─────────────────┘ │ +│ ▼ │ +│ ┌───────────────────────────┐ │ +│ │ Sparse Distance Funcs │ │ +│ │ (Dot, Cosine, BM25) │ │ +│ └───────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Module Structure + +``` +src/ +├── sparse/ +│ ├── mod.rs # Module exports +│ ├── types/ +│ │ ├── sparsevec.rs # Core sparse vector type +│ │ ├── coo.rs # COO format (coordinate) +│ │ └── csr.rs # CSR format (compressed sparse row) +│ ├── distance.rs # Sparse distance functions +│ ├── index/ +│ │ ├── inverted.rs # Inverted index for sparse search +│ │ └── sparse_hnsw.rs # HNSW adapted for sparse vectors +│ ├── hybrid.rs # Dense + sparse hybrid search +│ └── operators.rs # SQL operators +``` + +## SQL Interface + +### Sparse Vector Type + +```sql +-- Create table with sparse vectors +CREATE TABLE documents ( + id SERIAL PRIMARY KEY, + content TEXT, + dense_embedding vector(768), + sparse_embedding sparsevec(30000), -- BM25 or SPLADE + metadata jsonb +); + +-- Insert sparse vector (indices:values format) +INSERT INTO documents (content, sparse_embedding) +VALUES ( + 'Machine learning for natural language processing', + '{1024:0.5, 2048:0.3, 4096:0.8, 15000:0.2}'::sparsevec +); + +-- Insert from array representation +INSERT INTO documents (sparse_embedding) +VALUES (ruvector_to_sparse( + indices := ARRAY[1024, 2048, 4096, 15000], + values := ARRAY[0.5, 0.3, 0.8, 0.2], + dim := 30000 +)); +``` + +### Distance Operations + +```sql +-- Sparse dot product (inner product similarity) +SELECT id, content, + ruvector_sparse_dot(sparse_embedding, query_sparse) AS score +FROM documents +ORDER BY score DESC +LIMIT 10; + +-- Sparse cosine similarity +SELECT id, + ruvector_sparse_cosine(sparse_embedding, query_sparse) AS similarity +FROM documents +WHERE ruvector_sparse_cosine(sparse_embedding, query_sparse) > 0.5; + +-- Custom operator: <#> for sparse inner product +SELECT * FROM documents +ORDER BY sparse_embedding <#> query_sparse DESC +LIMIT 10; +``` + +### Sparse Index + +```sql +-- Create inverted index for sparse vectors +CREATE INDEX ON documents USING ruvector_sparse ( + sparse_embedding sparsevec(30000) +) WITH ( + pruning_threshold = 0.1, -- Prune low-weight terms + quantization = 'int8' -- Optional quantization +); + +-- Approximate sparse search +SELECT * FROM documents +ORDER BY sparse_embedding <#> query_sparse +LIMIT 10; +``` + +### Hybrid Dense + Sparse Search + +```sql +-- Hybrid search combining dense and sparse +SELECT id, content, + 0.7 * (1 - (dense_embedding <=> query_dense)) + + 0.3 * ruvector_sparse_dot(sparse_embedding, query_sparse) AS hybrid_score +FROM documents +ORDER BY hybrid_score DESC +LIMIT 10; + +-- Built-in hybrid search function +SELECT * FROM ruvector_hybrid_search( + table_name := 'documents', + dense_column := 'dense_embedding', + sparse_column := 'sparse_embedding', + dense_query := query_dense, + sparse_query := query_sparse, + dense_weight := 0.7, + sparse_weight := 0.3, + k := 10 +); +``` + +## Implementation Phases + +### Phase 1: Sparse Vector Type (Week 1-2) + +```rust +// src/sparse/types/sparsevec.rs + +use pgrx::prelude::*; +use serde::{Serialize, Deserialize}; + +/// Sparse vector stored as sorted (index, value) pairs +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SparseVec { + indices: Vec, + values: Vec, + dim: u32, +} + +impl SparseVec { + pub fn new(indices: Vec, values: Vec, dim: u32) -> Result { + if indices.len() != values.len() { + return Err(SparseError::LengthMismatch); + } + + // Ensure sorted and unique + let mut pairs: Vec<_> = indices.into_iter().zip(values.into_iter()).collect(); + pairs.sort_by_key(|(i, _)| *i); + pairs.dedup_by_key(|(i, _)| *i); + + let (indices, values): (Vec<_>, Vec<_>) = pairs.into_iter().unzip(); + + if indices.last().map_or(false, |&i| i >= dim) { + return Err(SparseError::IndexOutOfBounds); + } + + Ok(Self { indices, values, dim }) + } + + /// Number of non-zero elements + #[inline] + pub fn nnz(&self) -> usize { + self.indices.len() + } + + /// Get value at index (O(log n) binary search) + pub fn get(&self, index: u32) -> f32 { + match self.indices.binary_search(&index) { + Ok(pos) => self.values[pos], + Err(_) => 0.0, + } + } + + /// Iterate over non-zero elements + pub fn iter(&self) -> impl Iterator + '_ { + self.indices.iter().copied().zip(self.values.iter().copied()) + } + + /// L2 norm + pub fn norm(&self) -> f32 { + self.values.iter().map(|&v| v * v).sum::().sqrt() + } + + /// Prune elements below threshold + pub fn prune(&mut self, threshold: f32) { + let pairs: Vec<_> = self.indices.iter().copied() + .zip(self.values.iter().copied()) + .filter(|(_, v)| v.abs() >= threshold) + .collect(); + + self.indices = pairs.iter().map(|(i, _)| *i).collect(); + self.values = pairs.iter().map(|(_, v)| *v).collect(); + } + + /// Top-k sparsification + pub fn top_k(&self, k: usize) -> SparseVec { + let mut indexed: Vec<_> = self.indices.iter().copied() + .zip(self.values.iter().copied()) + .collect(); + + indexed.sort_by(|(_, a), (_, b)| b.abs().partial_cmp(&a.abs()).unwrap()); + indexed.truncate(k); + indexed.sort_by_key(|(i, _)| *i); + + let (indices, values): (Vec<_>, Vec<_>) = indexed.into_iter().unzip(); + + SparseVec { indices, values, dim: self.dim } + } +} + +// PostgreSQL type registration +#[derive(PostgresType, Serialize, Deserialize)] +#[pgx(sql = "CREATE TYPE sparsevec")] +pub struct PgSparseVec(SparseVec); + +impl FromDatum for PgSparseVec { + // ... TOAST-aware deserialization +} + +impl IntoDatum for PgSparseVec { + // ... serialization +} + +// Parse from string: '{1:0.5, 2:0.3}' +impl std::str::FromStr for SparseVec { + type Err = SparseError; + + fn from_str(s: &str) -> Result { + let s = s.trim().trim_start_matches('{').trim_end_matches('}'); + let mut indices = Vec::new(); + let mut values = Vec::new(); + let mut max_index = 0u32; + + for pair in s.split(',') { + let parts: Vec<_> = pair.trim().split(':').collect(); + if parts.len() != 2 { + return Err(SparseError::ParseError); + } + let idx: u32 = parts[0].trim().parse().map_err(|_| SparseError::ParseError)?; + let val: f32 = parts[1].trim().parse().map_err(|_| SparseError::ParseError)?; + indices.push(idx); + values.push(val); + max_index = max_index.max(idx); + } + + SparseVec::new(indices, values, max_index + 1) + } +} +``` + +### Phase 2: Sparse Distance Functions (Week 3-4) + +```rust +// src/sparse/distance.rs + +use simsimd::SpatialSimilarity; + +/// Sparse dot product (inner product) +/// Only iterates over shared non-zero indices +pub fn sparse_dot(a: &SparseVec, b: &SparseVec) -> f32 { + let mut result = 0.0; + let mut i = 0; + let mut j = 0; + + while i < a.indices.len() && j < b.indices.len() { + match a.indices[i].cmp(&b.indices[j]) { + std::cmp::Ordering::Less => i += 1, + std::cmp::Ordering::Greater => j += 1, + std::cmp::Ordering::Equal => { + result += a.values[i] * b.values[j]; + i += 1; + j += 1; + } + } + } + + result +} + +/// Sparse cosine similarity +pub fn sparse_cosine(a: &SparseVec, b: &SparseVec) -> f32 { + let dot = sparse_dot(a, b); + let norm_a = a.norm(); + let norm_b = b.norm(); + + if norm_a == 0.0 || norm_b == 0.0 { + return 0.0; + } + + dot / (norm_a * norm_b) +} + +/// Sparse Euclidean distance +pub fn sparse_euclidean(a: &SparseVec, b: &SparseVec) -> f32 { + let mut result = 0.0; + let mut i = 0; + let mut j = 0; + + while i < a.indices.len() || j < b.indices.len() { + let idx_a = a.indices.get(i).copied().unwrap_or(u32::MAX); + let idx_b = b.indices.get(j).copied().unwrap_or(u32::MAX); + + match idx_a.cmp(&idx_b) { + std::cmp::Ordering::Less => { + result += a.values[i] * a.values[i]; + i += 1; + } + std::cmp::Ordering::Greater => { + result += b.values[j] * b.values[j]; + j += 1; + } + std::cmp::Ordering::Equal => { + let diff = a.values[i] - b.values[j]; + result += diff * diff; + i += 1; + j += 1; + } + } + } + + result.sqrt() +} + +/// BM25 scoring for sparse term vectors +pub fn sparse_bm25( + query: &SparseVec, + doc: &SparseVec, + doc_len: f32, + avg_doc_len: f32, + k1: f32, + b: f32, +) -> f32 { + let mut score = 0.0; + let mut i = 0; + let mut j = 0; + + while i < query.indices.len() && j < doc.indices.len() { + match query.indices[i].cmp(&doc.indices[j]) { + std::cmp::Ordering::Less => i += 1, + std::cmp::Ordering::Greater => j += 1, + std::cmp::Ordering::Equal => { + let idf = query.values[i]; // Assume query values are IDF weights + let tf = doc.values[j]; // Doc values are TF + + let numerator = tf * (k1 + 1.0); + let denominator = tf + k1 * (1.0 - b + b * doc_len / avg_doc_len); + + score += idf * numerator / denominator; + i += 1; + j += 1; + } + } + } + + score +} + +// PostgreSQL functions +#[pg_extern(immutable, parallel_safe)] +fn ruvector_sparse_dot(a: PgSparseVec, b: PgSparseVec) -> f32 { + sparse_dot(&a.0, &b.0) +} + +#[pg_extern(immutable, parallel_safe)] +fn ruvector_sparse_cosine(a: PgSparseVec, b: PgSparseVec) -> f32 { + sparse_cosine(&a.0, &b.0) +} + +#[pg_extern(immutable, parallel_safe)] +fn ruvector_sparse_euclidean(a: PgSparseVec, b: PgSparseVec) -> f32 { + sparse_euclidean(&a.0, &b.0) +} +``` + +### Phase 3: Inverted Index (Week 5-7) + +```rust +// src/sparse/index/inverted.rs + +use dashmap::DashMap; +use parking_lot::RwLock; + +/// Inverted index for efficient sparse vector search +pub struct InvertedIndex { + /// term_id -> [(doc_id, weight), ...] + postings: DashMap>, + /// doc_id -> sparse vector (for re-ranking) + documents: DashMap, + /// Document norms for cosine similarity + doc_norms: DashMap, + /// Configuration + config: InvertedIndexConfig, +} + +pub struct InvertedIndexConfig { + pub pruning_threshold: f32, + pub max_postings_per_term: usize, + pub quantization: Option, +} + +impl InvertedIndex { + pub fn new(config: InvertedIndexConfig) -> Self { + Self { + postings: DashMap::new(), + documents: DashMap::new(), + doc_norms: DashMap::new(), + config, + } + } + + /// Insert document into index + pub fn insert(&self, doc_id: u64, vector: SparseVec) { + let norm = vector.norm(); + + // Index each non-zero term + for (term_id, weight) in vector.iter() { + if weight.abs() < self.config.pruning_threshold { + continue; + } + + self.postings + .entry(term_id) + .or_insert_with(Vec::new) + .push((doc_id, weight)); + } + + self.doc_norms.insert(doc_id, norm); + self.documents.insert(doc_id, vector); + } + + /// Search using WAND algorithm for top-k + pub fn search(&self, query: &SparseVec, k: usize) -> Vec<(u64, f32)> { + // Collect candidate documents + let mut doc_scores: HashMap = HashMap::new(); + + for (term_id, query_weight) in query.iter() { + if let Some(postings) = self.postings.get(&term_id) { + for &(doc_id, doc_weight) in postings.iter() { + *doc_scores.entry(doc_id).or_insert(0.0) += query_weight * doc_weight; + } + } + } + + // Get top-k + let mut results: Vec<_> = doc_scores.into_iter().collect(); + results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap()); + results.truncate(k); + + results + } + + /// WAND (Weak AND) algorithm for efficient top-k retrieval + pub fn search_wand(&self, query: &SparseVec, k: usize) -> Vec<(u64, f32)> { + // Sort query terms by max contribution (upper bound) + let mut term_info: Vec<_> = query.iter() + .filter_map(|(term_id, weight)| { + self.postings.get(&term_id).map(|p| { + let max_doc_weight = p.iter().map(|(_, w)| *w).fold(0.0f32, f32::max); + (term_id, weight, max_doc_weight * weight) + }) + }) + .collect(); + + term_info.sort_by(|(_, _, a), (_, _, b)| b.partial_cmp(a).unwrap()); + + // WAND traversal + let mut heap: BinaryHeap<(OrderedFloat, u64)> = BinaryHeap::new(); + let threshold = 0.0f32; + + // ... WAND implementation + + heap.into_iter().map(|(s, id)| (id, s.0)).collect() + } +} + +// PostgreSQL index access method +#[pg_extern] +fn ruvector_sparse_handler(internal: Internal) -> Internal { + // Index AM handler for sparse inverted index +} +``` + +### Phase 4: Hybrid Search (Week 8-9) + +```rust +// src/sparse/hybrid.rs + +/// Hybrid dense + sparse search +pub struct HybridSearch { + dense_weight: f32, + sparse_weight: f32, + fusion_method: FusionMethod, +} + +pub enum FusionMethod { + /// Linear combination of scores + Linear, + /// Reciprocal Rank Fusion + RRF { k: f32 }, + /// Learned fusion weights + Learned { model: FusionModel }, +} + +impl HybridSearch { + /// Combine dense and sparse results + pub fn search( + &self, + dense_results: &[(u64, f32)], + sparse_results: &[(u64, f32)], + k: usize, + ) -> Vec<(u64, f32)> { + match &self.fusion_method { + FusionMethod::Linear => { + self.linear_fusion(dense_results, sparse_results, k) + } + FusionMethod::RRF { k: rrf_k } => { + self.rrf_fusion(dense_results, sparse_results, k, *rrf_k) + } + FusionMethod::Learned { model } => { + model.fuse(dense_results, sparse_results, k) + } + } + } + + fn linear_fusion( + &self, + dense: &[(u64, f32)], + sparse: &[(u64, f32)], + k: usize, + ) -> Vec<(u64, f32)> { + let mut scores: HashMap = HashMap::new(); + + // Normalize dense scores to [0, 1] + let dense_max = dense.iter().map(|(_, s)| *s).fold(0.0f32, f32::max); + for (id, score) in dense { + let normalized = if dense_max > 0.0 { score / dense_max } else { 0.0 }; + *scores.entry(*id).or_insert(0.0) += self.dense_weight * normalized; + } + + // Normalize sparse scores to [0, 1] + let sparse_max = sparse.iter().map(|(_, s)| *s).fold(0.0f32, f32::max); + for (id, score) in sparse { + let normalized = if sparse_max > 0.0 { score / sparse_max } else { 0.0 }; + *scores.entry(*id).or_insert(0.0) += self.sparse_weight * normalized; + } + + let mut results: Vec<_> = scores.into_iter().collect(); + results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap()); + results.truncate(k); + results + } + + fn rrf_fusion( + &self, + dense: &[(u64, f32)], + sparse: &[(u64, f32)], + k: usize, + rrf_k: f32, + ) -> Vec<(u64, f32)> { + let mut scores: HashMap = HashMap::new(); + + // RRF: 1 / (k + rank) + for (rank, (id, _)) in dense.iter().enumerate() { + *scores.entry(*id).or_insert(0.0) += self.dense_weight / (rrf_k + rank as f32 + 1.0); + } + + for (rank, (id, _)) in sparse.iter().enumerate() { + *scores.entry(*id).or_insert(0.0) += self.sparse_weight / (rrf_k + rank as f32 + 1.0); + } + + let mut results: Vec<_> = scores.into_iter().collect(); + results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap()); + results.truncate(k); + results + } +} + +#[pg_extern] +fn ruvector_hybrid_search( + table_name: &str, + dense_column: &str, + sparse_column: &str, + dense_query: Vec, + sparse_query: PgSparseVec, + dense_weight: default!(f32, 0.7), + sparse_weight: default!(f32, 0.3), + k: default!(i32, 10), + fusion: default!(&str, "'linear'"), +) -> TableIterator<'static, (name!(id, i64), name!(score, f32))> { + // Implementation using SPI +} +``` + +### Phase 5: SPLADE Integration (Week 10) + +```rust +// src/sparse/splade.rs + +/// SPLADE-style learned sparse representations +pub struct SpladeEncoder { + /// Vocab size for term indices + vocab_size: usize, + /// Sparsity threshold + threshold: f32, +} + +impl SpladeEncoder { + /// Convert dense embedding to SPLADE-style sparse + /// (typically done externally, but we support post-processing) + pub fn sparsify(&self, logits: &[f32]) -> SparseVec { + let mut indices = Vec::new(); + let mut values = Vec::new(); + + for (i, &logit) in logits.iter().enumerate() { + // ReLU + log(1 + x) activation + if logit > 0.0 { + let value = (1.0 + logit).ln(); + if value > self.threshold { + indices.push(i as u32); + values.push(value); + } + } + } + + SparseVec::new(indices, values, self.vocab_size as u32).unwrap() + } +} + +#[pg_extern] +fn ruvector_to_sparse( + indices: Vec, + values: Vec, + dim: i32, +) -> PgSparseVec { + let indices: Vec = indices.into_iter().map(|i| i as u32).collect(); + PgSparseVec(SparseVec::new(indices, values, dim as u32).unwrap()) +} + +#[pg_extern] +fn ruvector_sparse_top_k(sparse: PgSparseVec, k: i32) -> PgSparseVec { + PgSparseVec(sparse.0.top_k(k as usize)) +} + +#[pg_extern] +fn ruvector_sparse_prune(sparse: PgSparseVec, threshold: f32) -> PgSparseVec { + let mut result = sparse.0.clone(); + result.prune(threshold); + PgSparseVec(result) +} +``` + +## Benchmarks + +| Operation | NNZ (query) | NNZ (doc) | Dim | Time (μs) | +|-----------|-------------|-----------|-----|-----------| +| Dot Product | 100 | 100 | 30K | 0.8 | +| Cosine | 100 | 100 | 30K | 1.2 | +| Inverted Search | 100 | - | 30K | 450 | +| Hybrid Search | 100 | 768 | 30K | 1200 | + +## Dependencies + +```toml +[dependencies] +# Concurrent collections +dashmap = "6.0" + +# Ordered floats for heaps +ordered-float = "4.2" + +# Serialization +serde = { version = "1.0", features = ["derive"] } +bincode = "2.0.0-rc.3" +``` + +## Feature Flags + +```toml +[features] +sparse = [] +sparse-inverted = ["sparse"] +sparse-hybrid = ["sparse"] +sparse-all = ["sparse-inverted", "sparse-hybrid"] +``` diff --git a/crates/ruvector-postgres/docs/integration-plans/06-graph-operations.md b/crates/ruvector-postgres/docs/integration-plans/06-graph-operations.md new file mode 100644 index 00000000..cda03551 --- /dev/null +++ b/crates/ruvector-postgres/docs/integration-plans/06-graph-operations.md @@ -0,0 +1,954 @@ +# Graph Operations & Cypher Integration Plan + +## Overview + +Integrate graph database capabilities from `ruvector-graph` into PostgreSQL, enabling Cypher query language support, property graph operations, and vector-enhanced graph traversals directly in SQL. + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ PostgreSQL Extension │ +├─────────────────────────────────────────────────────────────────┤ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Cypher Engine │ │ +│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌─────────┐ │ │ +│ │ │ Parser │→│ Planner │→│ Executor │→│ Result │ │ │ +│ │ └──────────┘ └──────────┘ └──────────┘ └─────────┘ │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ ▼ │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Property Graph Store │ │ +│ │ ┌───────────┐ ┌───────────┐ ┌───────────────────┐ │ │ +│ │ │ Nodes │ │ Edges │ │ Vector Embeddings │ │ │ +│ │ │ (Labels) │ │ (Types) │ │ (HNSW Index) │ │ │ +│ │ └───────────┘ └───────────┘ └───────────────────┘ │ │ +│ └─────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Module Structure + +``` +src/ +├── graph/ +│ ├── mod.rs # Module exports +│ ├── cypher/ +│ │ ├── parser.rs # Cypher parser (pest/nom) +│ │ ├── ast.rs # Abstract syntax tree +│ │ ├── planner.rs # Query planner +│ │ ├── executor.rs # Query executor +│ │ └── functions.rs # Built-in Cypher functions +│ ├── storage/ +│ │ ├── nodes.rs # Node storage +│ │ ├── edges.rs # Edge storage +│ │ └── properties.rs # Property storage +│ ├── traversal/ +│ │ ├── bfs.rs # Breadth-first search +│ │ ├── dfs.rs # Depth-first search +│ │ ├── shortest_path.rs # Shortest path algorithms +│ │ └── vector_walk.rs # Vector-guided traversal +│ ├── index/ +│ │ ├── label_index.rs # Label-based index +│ │ └── property_index.rs # Property index +│ └── operators.rs # SQL operators +``` + +## SQL Interface + +### Graph Schema Setup + +```sql +-- Create a property graph +SELECT ruvector_create_graph('social_network'); + +-- Define node labels +SELECT ruvector_create_node_label('social_network', 'Person', + properties := '{ + "name": "text", + "age": "integer", + "embedding": "vector(768)" + }' +); + +SELECT ruvector_create_node_label('social_network', 'Company', + properties := '{ + "name": "text", + "industry": "text", + "embedding": "vector(768)" + }' +); + +-- Define edge types +SELECT ruvector_create_edge_type('social_network', 'KNOWS', + properties := '{"since": "date", "strength": "float"}' +); + +SELECT ruvector_create_edge_type('social_network', 'WORKS_AT', + properties := '{"role": "text", "since": "date"}' +); +``` + +### Cypher Queries + +```sql +-- Execute Cypher queries +SELECT * FROM ruvector_cypher('social_network', $$ + MATCH (p:Person)-[:KNOWS]->(friend:Person) + WHERE p.name = 'Alice' + RETURN friend.name, friend.age +$$); + +-- Create nodes +SELECT ruvector_cypher('social_network', $$ + CREATE (p:Person {name: 'Bob', age: 30, embedding: $embedding}) + RETURN p +$$, params := '{"embedding": [0.1, 0.2, ...]}'); + +-- Create relationships +SELECT ruvector_cypher('social_network', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: date('2024-01-15'), strength: 0.8}]->(b) +$$); + +-- Pattern matching +SELECT * FROM ruvector_cypher('social_network', $$ + MATCH (p:Person)-[:WORKS_AT]->(c:Company {industry: 'Tech'}) + RETURN p.name, c.name + ORDER BY p.age DESC + LIMIT 10 +$$); +``` + +### Vector-Enhanced Graph Queries + +```sql +-- Find similar nodes using vector search + graph structure +SELECT * FROM ruvector_cypher('social_network', $$ + MATCH (p:Person) + WHERE ruvector.similarity(p.embedding, $query) > 0.8 + RETURN p.name, p.age, ruvector.similarity(p.embedding, $query) AS similarity + ORDER BY similarity DESC + LIMIT 10 +$$, params := '{"query": [0.1, 0.2, ...]}'); + +-- Graph-aware semantic search +SELECT * FROM ruvector_cypher('social_network', $$ + MATCH (p:Person)-[:KNOWS*1..3]->(friend:Person) + WHERE p.name = 'Alice' + WITH friend, ruvector.similarity(friend.embedding, $query) AS sim + WHERE sim > 0.7 + RETURN friend.name, sim + ORDER BY sim DESC +$$, params := '{"query": [0.1, 0.2, ...]}'); + +-- Personalized PageRank with vector similarity +SELECT * FROM ruvector_cypher('social_network', $$ + CALL ruvector.pagerank('Person', 'KNOWS', { + dampingFactor: 0.85, + iterations: 20, + personalizedOn: $seed_embedding + }) + YIELD node, score + RETURN node.name, score + ORDER BY score DESC + LIMIT 20 +$$, params := '{"seed_embedding": [0.1, 0.2, ...]}'); +``` + +### Path Finding + +```sql +-- Shortest path +SELECT * FROM ruvector_cypher('social_network', $$ + MATCH p = shortestPath((a:Person {name: 'Alice'})-[:KNOWS*1..6]-(b:Person {name: 'Bob'})) + RETURN p, length(p) +$$); + +-- All shortest paths +SELECT * FROM ruvector_cypher('social_network', $$ + MATCH p = allShortestPaths((a:Person {name: 'Alice'})-[:KNOWS*1..6]-(b:Person {name: 'Bob'})) + RETURN p, length(p) +$$); + +-- Vector-guided path (minimize embedding distance along path) +SELECT * FROM ruvector_cypher('social_network', $$ + MATCH p = ruvector.vectorPath( + (a:Person {name: 'Alice'}), + (b:Person {name: 'Bob'}), + 'KNOWS', + { + maxHops: 6, + vectorProperty: 'embedding', + optimization: 'minTotalDistance' + } + ) + RETURN p, ruvector.pathEmbeddingDistance(p) AS distance +$$); +``` + +### Graph Algorithms + +```sql +-- Community detection (Louvain) +SELECT * FROM ruvector_cypher('social_network', $$ + CALL ruvector.louvain('Person', 'KNOWS', {resolution: 1.0}) + YIELD node, communityId + RETURN node.name, communityId +$$); + +-- Node similarity (Jaccard) +SELECT * FROM ruvector_cypher('social_network', $$ + CALL ruvector.nodeSimilarity('Person', 'KNOWS', { + similarityCutoff: 0.5, + topK: 10 + }) + YIELD node1, node2, similarity + RETURN node1.name, node2.name, similarity +$$); + +-- Centrality measures +SELECT * FROM ruvector_cypher('social_network', $$ + CALL ruvector.betweenness('Person', 'KNOWS') + YIELD node, score + RETURN node.name, score + ORDER BY score DESC + LIMIT 10 +$$); +``` + +## Implementation Phases + +### Phase 1: Cypher Parser (Week 1-3) + +```rust +// src/graph/cypher/parser.rs + +use pest::Parser; +use pest_derive::Parser; + +#[derive(Parser)] +#[grammar = "graph/cypher/cypher.pest"] +pub struct CypherParser; + +/// Parse Cypher query string into AST +pub fn parse_cypher(query: &str) -> Result { + let pairs = CypherParser::parse(Rule::query, query)?; + + let mut builder = AstBuilder::new(); + for pair in pairs { + builder.process(pair)?; + } + + Ok(builder.build()) +} + +// src/graph/cypher/ast.rs + +#[derive(Debug, Clone)] +pub enum CypherQuery { + Match(MatchClause), + Create(CreateClause), + Merge(MergeClause), + Delete(DeleteClause), + Return(ReturnClause), + With(WithClause), + Compound(Vec), +} + +#[derive(Debug, Clone)] +pub struct MatchClause { + pub patterns: Vec, + pub where_clause: Option, + pub optional: bool, +} + +#[derive(Debug, Clone)] +pub struct Pattern { + pub nodes: Vec, + pub relationships: Vec, +} + +#[derive(Debug, Clone)] +pub struct NodePattern { + pub variable: Option, + pub labels: Vec, + pub properties: Option, +} + +#[derive(Debug, Clone)] +pub struct RelationshipPattern { + pub variable: Option, + pub types: Vec, + pub properties: Option, + pub direction: Direction, + pub length: RelationshipLength, +} + +#[derive(Debug, Clone)] +pub enum RelationshipLength { + Exactly(usize), + Range(Option, Option), // *1..3 + Any, // * +} +``` + +### Phase 2: Query Planner (Week 4-5) + +```rust +// src/graph/cypher/planner.rs + +pub struct QueryPlanner { + graph_store: Arc, + statistics: Arc, +} + +impl QueryPlanner { + pub fn plan(&self, query: &CypherQuery) -> Result { + let logical_plan = self.to_logical(query)?; + let optimized = self.optimize(logical_plan)?; + let physical_plan = self.to_physical(optimized)?; + + Ok(physical_plan) + } + + fn to_logical(&self, query: &CypherQuery) -> Result { + match query { + CypherQuery::Match(m) => self.plan_match(m), + CypherQuery::Create(c) => self.plan_create(c), + CypherQuery::Return(r) => self.plan_return(r), + // ... + } + } + + fn plan_match(&self, match_clause: &MatchClause) -> Result { + let mut plan = LogicalPlan::Scan; + + for pattern in &match_clause.patterns { + // Choose optimal starting point based on selectivity + let start_node = self.choose_start_node(pattern); + + // Build expand operations + for rel in &pattern.relationships { + plan = LogicalPlan::Expand { + input: Box::new(plan), + relationship: rel.clone(), + direction: rel.direction, + }; + } + } + + // Add filter for WHERE clause + if let Some(where_clause) = &match_clause.where_clause { + plan = LogicalPlan::Filter { + input: Box::new(plan), + predicate: where_clause.predicate.clone(), + }; + } + + Ok(plan) + } + + fn optimize(&self, plan: LogicalPlan) -> Result { + let mut optimized = plan; + + // Push down filters + optimized = self.push_down_filters(optimized); + + // Reorder joins based on selectivity + optimized = self.reorder_joins(optimized); + + // Use indexes where available + optimized = self.apply_indexes(optimized); + + Ok(optimized) + } +} + +#[derive(Debug)] +pub enum LogicalPlan { + Scan, + NodeByLabel { label: String }, + NodeById { ids: Vec }, + Expand { + input: Box, + relationship: RelationshipPattern, + direction: Direction, + }, + Filter { + input: Box, + predicate: Expression, + }, + Project { + input: Box, + expressions: Vec<(String, Expression)>, + }, + VectorSearch { + label: String, + property: String, + query: Vec, + k: usize, + }, + // ... +} +``` + +### Phase 3: Query Executor (Week 6-8) + +```rust +// src/graph/cypher/executor.rs + +pub struct QueryExecutor { + graph_store: Arc, +} + +impl QueryExecutor { + pub fn execute(&self, plan: &QueryPlan) -> Result { + match plan { + QueryPlan::Scan { label } => self.scan_nodes(label), + QueryPlan::Expand { input, rel, dir } => { + let source_rows = self.execute(input)?; + self.expand_relationships(&source_rows, rel, dir) + } + QueryPlan::Filter { input, predicate } => { + let rows = self.execute(input)?; + self.filter_rows(&rows, predicate) + } + QueryPlan::VectorSearch { label, property, query, k } => { + self.vector_search(label, property, query, *k) + } + QueryPlan::ShortestPath { start, end, rel_types, max_hops } => { + self.find_shortest_path(start, end, rel_types, *max_hops) + } + // ... + } + } + + fn expand_relationships( + &self, + source_rows: &QueryResult, + rel_pattern: &RelationshipPattern, + direction: &Direction, + ) -> Result { + let mut result_rows = Vec::new(); + + for row in source_rows.rows() { + let node_id = row.get_node_id()?; + + let edges = match direction { + Direction::Outgoing => self.graph_store.outgoing_edges(node_id, &rel_pattern.types), + Direction::Incoming => self.graph_store.incoming_edges(node_id, &rel_pattern.types), + Direction::Both => self.graph_store.all_edges(node_id, &rel_pattern.types), + }; + + for edge in edges { + let target = match direction { + Direction::Outgoing => edge.target, + Direction::Incoming => edge.source, + Direction::Both => if edge.source == node_id { edge.target } else { edge.source }, + }; + + let target_node = self.graph_store.get_node(target)?; + + // Check relationship properties + if let Some(props) = &rel_pattern.properties { + if !self.matches_properties(&edge.properties, props) { + continue; + } + } + + let mut new_row = row.clone(); + if let Some(var) = &rel_pattern.variable { + new_row.set(var, Value::Relationship(edge.clone())); + } + new_row.extend_with_node(target_node); + + result_rows.push(new_row); + } + } + + Ok(QueryResult::from_rows(result_rows)) + } + + fn vector_search( + &self, + label: &str, + property: &str, + query: &[f32], + k: usize, + ) -> Result { + // Use HNSW index for vector search + let index = self.graph_store.get_vector_index(label, property)?; + let results = index.search(query, k); + + let mut rows = Vec::with_capacity(k); + for (node_id, score) in results { + let node = self.graph_store.get_node(node_id)?; + let mut row = Row::new(); + row.set("node", Value::Node(node)); + row.set("score", Value::Float(score)); + rows.push(row); + } + + Ok(QueryResult::from_rows(rows)) + } +} +``` + +### Phase 4: Graph Storage (Week 9-10) + +```rust +// src/graph/storage/nodes.rs + +use dashmap::DashMap; +use parking_lot::RwLock; + +/// Node storage with label-based indexing +pub struct NodeStore { + /// node_id -> node data + nodes: DashMap, + /// label -> set of node_ids + label_index: DashMap>, + /// (label, property) -> property index + property_indexes: DashMap<(String, String), PropertyIndex>, + /// (label, property) -> vector index + vector_indexes: DashMap<(String, String), HnswIndex>, + /// Next node ID + next_id: AtomicU64, +} + +#[derive(Debug, Clone)] +pub struct Node { + pub id: u64, + pub labels: Vec, + pub properties: Properties, +} + +impl NodeStore { + pub fn create_node(&self, labels: Vec, properties: Properties) -> u64 { + let id = self.next_id.fetch_add(1, Ordering::SeqCst); + + let node = Node { id, labels: labels.clone(), properties: properties.clone() }; + + // Add to main store + self.nodes.insert(id, node); + + // Update label indexes + for label in &labels { + self.label_index + .entry(label.clone()) + .or_insert_with(HashSet::new) + .insert(id); + } + + // Update property indexes + for (key, value) in &properties { + for label in &labels { + if let Some(idx) = self.property_indexes.get(&(label.clone(), key.clone())) { + idx.insert(value.clone(), id); + } + } + } + + // Update vector indexes + for (key, value) in &properties { + if let Value::Vector(vec) = value { + for label in &labels { + if let Some(idx) = self.vector_indexes.get(&(label.clone(), key.clone())) { + idx.insert(id, vec); + } + } + } + } + + id + } + + pub fn nodes_by_label(&self, label: &str) -> Vec<&Node> { + self.label_index + .get(label) + .map(|ids| { + ids.iter() + .filter_map(|id| self.nodes.get(id).map(|n| n.value())) + .collect() + }) + .unwrap_or_default() + } +} + +// src/graph/storage/edges.rs + +/// Edge storage with adjacency lists +pub struct EdgeStore { + /// edge_id -> edge data + edges: DashMap, + /// node_id -> outgoing edges + outgoing: DashMap>, + /// node_id -> incoming edges + incoming: DashMap>, + /// edge_type -> set of edge_ids + type_index: DashMap>, + /// Next edge ID + next_id: AtomicU64, +} + +#[derive(Debug, Clone)] +pub struct Edge { + pub id: u64, + pub source: u64, + pub target: u64, + pub edge_type: String, + pub properties: Properties, +} + +impl EdgeStore { + pub fn create_edge( + &self, + source: u64, + target: u64, + edge_type: String, + properties: Properties, + ) -> u64 { + let id = self.next_id.fetch_add(1, Ordering::SeqCst); + + let edge = Edge { + id, + source, + target, + edge_type: edge_type.clone(), + properties, + }; + + // Add to main store + self.edges.insert(id, edge); + + // Update adjacency lists + self.outgoing.entry(source).or_insert_with(Vec::new).push(id); + self.incoming.entry(target).or_insert_with(Vec::new).push(id); + + // Update type index + self.type_index + .entry(edge_type) + .or_insert_with(HashSet::new) + .insert(id); + + id + } + + pub fn outgoing_edges(&self, node_id: u64, types: &[String]) -> Vec<&Edge> { + self.outgoing + .get(&node_id) + .map(|edge_ids| { + edge_ids.iter() + .filter_map(|id| self.edges.get(id)) + .filter(|e| types.is_empty() || types.contains(&e.edge_type)) + .map(|e| e.value()) + .collect() + }) + .unwrap_or_default() + } +} +``` + +### Phase 5: Graph Algorithms (Week 11-12) + +```rust +// src/graph/traversal/shortest_path.rs + +use std::collections::{BinaryHeap, HashMap, VecDeque}; + +/// BFS-based shortest path +pub fn shortest_path_bfs( + store: &GraphStore, + start: u64, + end: u64, + edge_types: &[String], + max_hops: usize, +) -> Option> { + let mut visited = HashSet::new(); + let mut queue = VecDeque::new(); + let mut parents: HashMap = HashMap::new(); + + queue.push_back((start, 0)); + visited.insert(start); + + while let Some((node, depth)) = queue.pop_front() { + if node == end { + // Reconstruct path + return Some(reconstruct_path(&parents, start, end)); + } + + if depth >= max_hops { + continue; + } + + for edge in store.edges.outgoing_edges(node, edge_types) { + if !visited.contains(&edge.target) { + visited.insert(edge.target); + parents.insert(edge.target, node); + queue.push_back((edge.target, depth + 1)); + } + } + } + + None +} + +/// Dijkstra's algorithm for weighted shortest path +pub fn shortest_path_dijkstra( + store: &GraphStore, + start: u64, + end: u64, + edge_types: &[String], + weight_property: &str, +) -> Option<(Vec, f64)> { + let mut distances: HashMap = HashMap::new(); + let mut parents: HashMap = HashMap::new(); + let mut heap = BinaryHeap::new(); + + distances.insert(start, 0.0); + heap.push(Reverse((OrderedFloat(0.0), start))); + + while let Some(Reverse((OrderedFloat(dist), node))) = heap.pop() { + if node == end { + return Some((reconstruct_path(&parents, start, end), dist)); + } + + if dist > *distances.get(&node).unwrap_or(&f64::INFINITY) { + continue; + } + + for edge in store.edges.outgoing_edges(node, edge_types) { + let weight = edge.properties + .get(weight_property) + .and_then(|v| v.as_f64()) + .unwrap_or(1.0); + + let new_dist = dist + weight; + + if new_dist < *distances.get(&edge.target).unwrap_or(&f64::INFINITY) { + distances.insert(edge.target, new_dist); + parents.insert(edge.target, node); + heap.push(Reverse((OrderedFloat(new_dist), edge.target))); + } + } + } + + None +} + +/// Vector-guided path finding +pub fn vector_guided_path( + store: &GraphStore, + start: u64, + end: u64, + edge_types: &[String], + vector_property: &str, + max_hops: usize, +) -> Option> { + let target_vec = store.nodes.get_node(end)? + .properties.get(vector_property)? + .as_vector()?; + + let mut heap = BinaryHeap::new(); + let mut visited = HashSet::new(); + let mut parents: HashMap = HashMap::new(); + + let start_vec = store.nodes.get_node(start)? + .properties.get(vector_property)? + .as_vector()?; + + let start_dist = cosine_distance(start_vec, target_vec); + heap.push(Reverse((OrderedFloat(start_dist), start, 0))); + + while let Some(Reverse((_, node, depth))) = heap.pop() { + if node == end { + return Some(reconstruct_path(&parents, start, end)); + } + + if visited.contains(&node) || depth >= max_hops { + continue; + } + visited.insert(node); + + for edge in store.edges.outgoing_edges(node, edge_types) { + if visited.contains(&edge.target) { + continue; + } + + if let Some(vec) = store.nodes.get_node(edge.target) + .and_then(|n| n.properties.get(vector_property)) + .and_then(|v| v.as_vector()) + { + let dist = cosine_distance(vec, target_vec); + parents.insert(edge.target, node); + heap.push(Reverse((OrderedFloat(dist), edge.target, depth + 1))); + } + } + } + + None +} +``` + +### Phase 6: PostgreSQL Integration (Week 13-14) + +```rust +// src/graph/operators.rs + +// Main Cypher execution function +#[pg_extern] +fn ruvector_cypher( + graph_name: &str, + query: &str, + params: default!(Option, "NULL"), +) -> TableIterator<'static, (name!(result, pgrx::JsonB),)> { + let graph = get_or_create_graph(graph_name); + + // Parse parameters + let parameters = params + .map(|p| serde_json::from_value(p.0).unwrap_or_default()) + .unwrap_or_default(); + + // Parse query + let ast = parse_cypher(query).expect("Failed to parse Cypher query"); + + // Plan query + let plan = QueryPlanner::new(&graph).plan(&ast).expect("Failed to plan query"); + + // Execute query + let result = QueryExecutor::new(&graph).execute(&plan).expect("Failed to execute query"); + + // Convert to table iterator + let rows: Vec<_> = result.rows() + .map(|row| (pgrx::JsonB(row.to_json()),)) + .collect(); + + TableIterator::new(rows) +} + +// Graph creation +#[pg_extern] +fn ruvector_create_graph(name: &str) -> bool { + GRAPH_STORE.create_graph(name).is_ok() +} + +// Node label creation +#[pg_extern] +fn ruvector_create_node_label( + graph_name: &str, + label: &str, + properties: pgrx::JsonB, +) -> bool { + let graph = get_graph(graph_name).expect("Graph not found"); + let schema: HashMap = serde_json::from_value(properties.0) + .expect("Invalid properties schema"); + + graph.create_label(label, schema).is_ok() +} + +// Edge type creation +#[pg_extern] +fn ruvector_create_edge_type( + graph_name: &str, + edge_type: &str, + properties: pgrx::JsonB, +) -> bool { + let graph = get_graph(graph_name).expect("Graph not found"); + let schema: HashMap = serde_json::from_value(properties.0) + .expect("Invalid properties schema"); + + graph.create_edge_type(edge_type, schema).is_ok() +} + +// Helper to get graph statistics +#[pg_extern] +fn ruvector_graph_stats(graph_name: &str) -> pgrx::JsonB { + let graph = get_graph(graph_name).expect("Graph not found"); + + pgrx::JsonB(serde_json::json!({ + "node_count": graph.node_count(), + "edge_count": graph.edge_count(), + "labels": graph.labels(), + "edge_types": graph.edge_types(), + "memory_mb": graph.memory_usage_mb(), + })) +} +``` + +## Supported Cypher Features + +### Clauses +- `MATCH` - Pattern matching +- `OPTIONAL MATCH` - Optional pattern matching +- `CREATE` - Create nodes/relationships +- `MERGE` - Match or create +- `DELETE` / `DETACH DELETE` - Delete nodes/relationships +- `SET` - Update properties +- `REMOVE` - Remove properties/labels +- `RETURN` - Return results +- `WITH` - Query chaining +- `WHERE` - Filtering +- `ORDER BY` - Sorting +- `SKIP` / `LIMIT` - Pagination +- `UNION` / `UNION ALL` - Combining results + +### Expressions +- Property access: `n.name` +- Labels: `n:Person` +- Relationship types: `[:KNOWS]` +- Variable length: `[:KNOWS*1..3]` +- List comprehensions: `[x IN list WHERE x > 5]` +- CASE expressions + +### Functions +- Aggregation: `count()`, `sum()`, `avg()`, `min()`, `max()`, `collect()` +- String: `toUpper()`, `toLower()`, `trim()`, `split()` +- Math: `abs()`, `ceil()`, `floor()`, `round()`, `sqrt()` +- List: `head()`, `tail()`, `size()`, `range()` +- Path: `length()`, `nodes()`, `relationships()` +- **RuVector-specific**: + - `ruvector.similarity(embedding1, embedding2)` + - `ruvector.distance(embedding1, embedding2, metric)` + - `ruvector.knn(embedding, k)` + +## Benchmarks + +| Operation | Nodes | Edges | Time (ms) | +|-----------|-------|-------|-----------| +| Simple MATCH | 100K | 1M | 2.5 | +| 2-hop traversal | 100K | 1M | 15 | +| Shortest path (BFS) | 100K | 1M | 8 | +| Vector-guided path | 100K | 1M | 25 | +| PageRank (20 iter) | 100K | 1M | 450 | +| Community detection | 100K | 1M | 1200 | + +## Dependencies + +```toml +[dependencies] +# Link to ruvector-graph +ruvector-graph = { path = "../ruvector-graph", optional = true } + +# Parser +pest = "2.7" +pest_derive = "2.7" + +# Concurrent collections +dashmap = "6.0" +parking_lot = "0.12" + +# Graph algorithms +petgraph = { version = "0.6", optional = true } +``` + +## Feature Flags + +```toml +[features] +graph = [] +graph-cypher = ["graph", "pest", "pest_derive"] +graph-algorithms = ["graph", "petgraph"] +graph-vector = ["graph", "index-hnsw"] +graph-all = ["graph-cypher", "graph-algorithms", "graph-vector"] +``` diff --git a/crates/ruvector-postgres/docs/integration-plans/07-tiny-dancer-routing.md b/crates/ruvector-postgres/docs/integration-plans/07-tiny-dancer-routing.md new file mode 100644 index 00000000..55e01f40 --- /dev/null +++ b/crates/ruvector-postgres/docs/integration-plans/07-tiny-dancer-routing.md @@ -0,0 +1,985 @@ +# Tiny Dancer Routing Integration Plan + +## Overview + +Integrate AI agent routing capabilities from `ruvector-tiny-dancer` into PostgreSQL, enabling intelligent request routing, model selection, and cost optimization directly in SQL. + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ PostgreSQL Extension │ +├─────────────────────────────────────────────────────────────────┤ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Tiny Dancer Router │ │ +│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ │ +│ │ │ FastGRNN │ │ Route │ │ Cost │ │ │ +│ │ │ Inference │ │ Classifier │ │ Optimizer │ │ │ +│ │ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ │ +│ └─────────┼─────────────────┼─────────────────┼───────────┘ │ +│ └─────────────────┴─────────────────┘ │ +│ ▼ │ +│ ┌───────────────────────────┐ │ +│ │ Agent Registry & Pool │ │ +│ │ (LLMs, Tools, APIs) │ │ +│ └───────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Module Structure + +``` +src/ +├── routing/ +│ ├── mod.rs # Module exports +│ ├── fastgrnn.rs # FastGRNN neural inference +│ ├── router.rs # Main routing engine +│ ├── classifier.rs # Route classification +│ ├── cost_optimizer.rs # Cost/latency optimization +│ ├── agents/ +│ │ ├── registry.rs # Agent registration +│ │ ├── pool.rs # Agent pool management +│ │ └── capabilities.rs # Capability matching +│ ├── policies/ +│ │ ├── cost.rs # Cost-based routing +│ │ ├── latency.rs # Latency-based routing +│ │ ├── quality.rs # Quality-based routing +│ │ └── hybrid.rs # Multi-objective routing +│ └── operators.rs # SQL operators +``` + +## SQL Interface + +### Agent Registration + +```sql +-- Register AI agents/models +SELECT ruvector_register_agent( + name := 'gpt-4', + agent_type := 'llm', + capabilities := ARRAY['reasoning', 'code', 'analysis', 'creative'], + cost_per_1k_tokens := 0.03, + avg_latency_ms := 2500, + quality_score := 0.95, + metadata := '{"provider": "openai", "context_window": 128000}' +); + +SELECT ruvector_register_agent( + name := 'claude-3-haiku', + agent_type := 'llm', + capabilities := ARRAY['fast-response', 'simple-tasks', 'classification'], + cost_per_1k_tokens := 0.00025, + avg_latency_ms := 400, + quality_score := 0.80, + metadata := '{"provider": "anthropic", "context_window": 200000}' +); + +SELECT ruvector_register_agent( + name := 'code-specialist', + agent_type := 'tool', + capabilities := ARRAY['code-execution', 'debugging', 'testing'], + cost_per_call := 0.001, + avg_latency_ms := 100, + quality_score := 0.90 +); + +-- List registered agents +SELECT * FROM ruvector_list_agents(); +``` + +### Basic Routing + +```sql +-- Route a request to the best agent +SELECT * FROM ruvector_route( + request := 'Write a Python function to calculate Fibonacci numbers', + optimize_for := 'cost' -- or 'latency', 'quality', 'balanced' +); + +-- Result: +-- | agent_name | confidence | estimated_cost | estimated_latency | +-- |------------|------------|----------------|-------------------| +-- | claude-3-haiku | 0.85 | 0.001 | 400ms | + +-- Route with constraints +SELECT * FROM ruvector_route( + request := 'Analyze this complex legal document', + required_capabilities := ARRAY['reasoning', 'analysis'], + max_cost := 0.10, + max_latency_ms := 5000, + min_quality := 0.90 +); + +-- Multi-agent routing (for complex tasks) +SELECT * FROM ruvector_route_multi( + request := 'Build and deploy a web application', + num_agents := 3, + strategy := 'pipeline' -- or 'parallel', 'ensemble' +); +``` + +### Semantic Routing + +```sql +-- Create semantic routes (like function calling) +SELECT ruvector_create_route( + name := 'customer_support', + description := 'Handle customer support inquiries, complaints, and feedback', + embedding := ruvector_embed('Customer support and help requests'), + target_agent := 'support-agent', + priority := 1 +); + +SELECT ruvector_create_route( + name := 'technical_docs', + description := 'Answer questions about technical documentation and APIs', + embedding := ruvector_embed('Technical documentation and API reference'), + target_agent := 'docs-agent', + priority := 2 +); + +-- Semantic route matching +SELECT * FROM ruvector_semantic_route( + query := 'How do I reset my password?', + top_k := 3 +); + +-- Result: +-- | route_name | similarity | target_agent | confidence | +-- |------------|------------|--------------|------------| +-- | customer_support | 0.92 | support-agent | 0.95 | +``` + +### Cost Optimization + +```sql +-- Analyze routing costs +SELECT * FROM ruvector_routing_analytics( + time_range := '7 days', + group_by := 'agent' +); + +-- Result: +-- | agent | total_requests | total_cost | avg_latency | success_rate | +-- |-------|----------------|------------|-------------|--------------| +-- | gpt-4 | 1000 | $30.00 | 2.5s | 99.2% | +-- | haiku | 5000 | $1.25 | 0.4s | 98.5% | + +-- Optimize budget allocation +SELECT * FROM ruvector_optimize_budget( + monthly_budget := 100.00, + quality_threshold := 0.85, + latency_threshold_ms := 2000 +); + +-- Auto-route with budget awareness +SELECT * FROM ruvector_route( + request := 'Summarize this article', + budget_remaining := 10.00, + optimize_for := 'quality_per_dollar' +); +``` + +### Batch Routing + +```sql +-- Route multiple requests efficiently +SELECT * FROM ruvector_batch_route( + requests := ARRAY[ + 'Simple question 1', + 'Complex analysis task', + 'Code generation request' + ], + optimize_for := 'total_cost' +); + +-- Classify requests in batch (for preprocessing) +SELECT request_id, ruvector_classify_request(content) AS classification +FROM pending_requests; +``` + +## Implementation Phases + +### Phase 1: FastGRNN Core (Week 1-3) + +```rust +// src/routing/fastgrnn.rs + +use simsimd::SpatialSimilarity; + +/// FastGRNN (Fast Gated Recurrent Neural Network) +/// Lightweight neural network for fast inference +pub struct FastGRNN { + // Gate weights + w_gate: Vec, // [hidden, input] + u_gate: Vec, // [hidden, hidden] + b_gate: Vec, // [hidden] + + // Update weights + w_update: Vec, // [hidden, input] + u_update: Vec, // [hidden, hidden] + b_update: Vec, // [hidden] + + // Hyperparameters + zeta: f32, // Gate sparsity + nu: f32, // Update sparsity + + input_dim: usize, + hidden_dim: usize, +} + +impl FastGRNN { + pub fn new(input_dim: usize, hidden_dim: usize) -> Self { + Self { + w_gate: Self::init_weights(hidden_dim, input_dim), + u_gate: Self::init_weights(hidden_dim, hidden_dim), + b_gate: vec![0.0; hidden_dim], + w_update: Self::init_weights(hidden_dim, input_dim), + u_update: Self::init_weights(hidden_dim, hidden_dim), + b_update: vec![0.0; hidden_dim], + zeta: 1.0, + nu: 1.0, + input_dim, + hidden_dim, + } + } + + /// Single step forward pass + /// h_t = (ζ * (1 - z_t) + ν) ⊙ tanh(Wx_t + Uh_{t-1} + b_h) + z_t ⊙ h_{t-1} + pub fn step(&self, input: &[f32], hidden: &[f32]) -> Vec { + // Gate: z = σ(W_z x + U_z h + b_z) + let gate = self.sigmoid(&self.linear_combine( + input, hidden, + &self.w_gate, &self.u_gate, &self.b_gate + )); + + // Update: h̃ = tanh(W_h x + U_h h + b_h) + let update = self.tanh(&self.linear_combine( + input, hidden, + &self.w_update, &self.u_update, &self.b_update + )); + + // New hidden: h = (ζ(1-z) + ν) ⊙ h̃ + z ⊙ h + let mut new_hidden = vec![0.0; self.hidden_dim]; + for i in 0..self.hidden_dim { + let gate_factor = self.zeta * (1.0 - gate[i]) + self.nu; + new_hidden[i] = gate_factor * update[i] + gate[i] * hidden[i]; + } + + new_hidden + } + + /// Process sequence + pub fn forward(&self, sequence: &[Vec]) -> Vec { + let mut hidden = vec![0.0; self.hidden_dim]; + + for input in sequence { + hidden = self.step(input, &hidden); + } + + hidden + } + + /// Process single input (common case for routing) + pub fn forward_single(&self, input: &[f32]) -> Vec { + let hidden = vec![0.0; self.hidden_dim]; + self.step(input, &hidden) + } + + #[inline] + fn linear_combine( + &self, + input: &[f32], + hidden: &[f32], + w: &[f32], + u: &[f32], + b: &[f32], + ) -> Vec { + let mut result = b.to_vec(); + + // W @ x + for i in 0..self.hidden_dim { + for j in 0..self.input_dim { + result[i] += w[i * self.input_dim + j] * input[j]; + } + } + + // U @ h + for i in 0..self.hidden_dim { + for j in 0..self.hidden_dim { + result[i] += u[i * self.hidden_dim + j] * hidden[j]; + } + } + + result + } + + #[inline] + fn sigmoid(&self, x: &[f32]) -> Vec { + x.iter().map(|&v| 1.0 / (1.0 + (-v).exp())).collect() + } + + #[inline] + fn tanh(&self, x: &[f32]) -> Vec { + x.iter().map(|&v| v.tanh()).collect() + } +} +``` + +### Phase 2: Route Classifier (Week 4-5) + +```rust +// src/routing/classifier.rs + +/// Route classifier using FastGRNN + linear head +pub struct RouteClassifier { + fastgrnn: FastGRNN, + classifier_head: Vec, // [num_classes, hidden_dim] + num_classes: usize, + class_names: Vec, +} + +impl RouteClassifier { + /// Classify request to route category + pub fn classify(&self, embedding: &[f32]) -> Vec<(String, f32)> { + // FastGRNN encoding + let hidden = self.fastgrnn.forward_single(embedding); + + // Linear classifier + let mut logits = vec![0.0; self.num_classes]; + for i in 0..self.num_classes { + for j in 0..hidden.len() { + logits[i] += self.classifier_head[i * hidden.len() + j] * hidden[j]; + } + } + + // Softmax + let probs = softmax(&logits); + + // Return sorted by probability + let mut results: Vec<_> = self.class_names.iter() + .zip(probs.iter()) + .map(|(name, &prob)| (name.clone(), prob)) + .collect(); + + results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap()); + results + } + + /// Multi-label classification (request may need multiple capabilities) + pub fn classify_capabilities(&self, embedding: &[f32]) -> Vec<(String, f32)> { + let hidden = self.fastgrnn.forward_single(embedding); + + // Sigmoid for multi-label + let mut results = Vec::new(); + for i in 0..self.num_classes { + let mut logit = 0.0; + for j in 0..hidden.len() { + logit += self.classifier_head[i * hidden.len() + j] * hidden[j]; + } + let prob = 1.0 / (1.0 + (-logit).exp()); + + if prob > 0.5 { + results.push((self.class_names[i].clone(), prob)); + } + } + + results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap()); + results + } +} + +#[pg_extern] +fn ruvector_classify_request(request: &str) -> pgrx::JsonB { + let embedding = get_embedding(request); + let classifier = get_route_classifier(); + + let classifications = classifier.classify(&embedding); + + pgrx::JsonB(serde_json::json!({ + "classifications": classifications, + "top_category": classifications.first().map(|(name, _)| name), + "confidence": classifications.first().map(|(_, prob)| prob), + })) +} +``` + +### Phase 3: Agent Registry (Week 6-7) + +```rust +// src/routing/agents/registry.rs + +use dashmap::DashMap; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Agent { + pub name: String, + pub agent_type: AgentType, + pub capabilities: Vec, + pub capability_embedding: Vec, // Embedding of capabilities for semantic matching + pub cost_model: CostModel, + pub performance: AgentPerformance, + pub metadata: serde_json::Value, + pub active: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum AgentType { + LLM, + Tool, + API, + Human, + Ensemble, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CostModel { + pub cost_per_1k_tokens: Option, + pub cost_per_call: Option, + pub cost_per_second: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AgentPerformance { + pub avg_latency_ms: f64, + pub p99_latency_ms: f64, + pub quality_score: f64, + pub success_rate: f64, + pub total_requests: u64, +} + +/// Global agent registry +pub struct AgentRegistry { + agents: DashMap, + capability_index: HnswIndex, // For semantic capability matching +} + +impl AgentRegistry { + pub fn register(&self, agent: Agent) -> Result<(), RegistryError> { + // Index capability embedding + let embedding = &agent.capability_embedding; + self.capability_index.insert(&agent.name, embedding); + + self.agents.insert(agent.name.clone(), agent); + Ok(()) + } + + pub fn get(&self, name: &str) -> Option { + self.agents.get(name).map(|a| a.clone()) + } + + pub fn find_by_capability(&self, capability: &str, k: usize) -> Vec<&Agent> { + let embedding = get_embedding(capability); + let results = self.capability_index.search(&embedding, k); + + results.iter() + .filter_map(|(name, _)| self.agents.get(name.as_str()).map(|a| a.value())) + .collect() + } + + pub fn list_active(&self) -> Vec { + self.agents.iter() + .filter(|a| a.active) + .map(|a| a.clone()) + .collect() + } +} + +#[pg_extern] +fn ruvector_register_agent( + name: &str, + agent_type: &str, + capabilities: Vec, + cost_per_1k_tokens: default!(Option, "NULL"), + cost_per_call: default!(Option, "NULL"), + avg_latency_ms: f64, + quality_score: f64, + metadata: default!(Option, "NULL"), +) -> bool { + let registry = get_agent_registry(); + + // Create capability embedding + let capability_text = capabilities.join(", "); + let capability_embedding = get_embedding(&capability_text); + + let agent = Agent { + name: name.to_string(), + agent_type: agent_type.parse().unwrap_or(AgentType::LLM), + capabilities, + capability_embedding, + cost_model: CostModel { + cost_per_1k_tokens, + cost_per_call, + cost_per_second: None, + }, + performance: AgentPerformance { + avg_latency_ms, + p99_latency_ms: avg_latency_ms * 2.0, + quality_score, + success_rate: 1.0, + total_requests: 0, + }, + metadata: metadata.map(|m| m.0).unwrap_or(serde_json::json!({})), + active: true, + }; + + registry.register(agent).is_ok() +} +``` + +### Phase 4: Routing Engine (Week 8-9) + +```rust +// src/routing/router.rs + +pub struct Router { + registry: Arc, + classifier: Arc, + optimizer: Arc, + semantic_routes: Arc, +} + +#[derive(Debug, Clone)] +pub struct RoutingDecision { + pub agent: Agent, + pub confidence: f64, + pub estimated_cost: f64, + pub estimated_latency_ms: f64, + pub reasoning: String, +} + +#[derive(Debug, Clone)] +pub struct RoutingConstraints { + pub required_capabilities: Option>, + pub max_cost: Option, + pub max_latency_ms: Option, + pub min_quality: Option, + pub excluded_agents: Option>, +} + +impl Router { + /// Route request to best agent + pub fn route( + &self, + request: &str, + constraints: &RoutingConstraints, + optimize_for: OptimizationTarget, + ) -> Result { + let embedding = get_embedding(request); + + // Get candidate agents + let mut candidates = self.get_candidates(&embedding, constraints)?; + + if candidates.is_empty() { + return Err(RoutingError::NoSuitableAgent); + } + + // Score candidates + let scored: Vec<_> = candidates.iter() + .map(|agent| { + let score = self.score_agent(agent, &embedding, optimize_for); + (agent, score) + }) + .collect(); + + // Select best + let (best_agent, confidence) = scored.into_iter() + .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()) + .unwrap(); + + Ok(RoutingDecision { + agent: best_agent.clone(), + confidence, + estimated_cost: self.estimate_cost(best_agent, request), + estimated_latency_ms: best_agent.performance.avg_latency_ms, + reasoning: format!("Selected {} based on {:?} optimization", best_agent.name, optimize_for), + }) + } + + fn get_candidates( + &self, + embedding: &[f32], + constraints: &RoutingConstraints, + ) -> Result, RoutingError> { + let mut candidates: Vec<_> = self.registry.list_active(); + + // Filter by required capabilities + if let Some(required) = &constraints.required_capabilities { + candidates.retain(|a| { + required.iter().all(|cap| a.capabilities.contains(cap)) + }); + } + + // Filter by cost + if let Some(max_cost) = constraints.max_cost { + candidates.retain(|a| { + a.cost_model.cost_per_1k_tokens.unwrap_or(0.0) <= max_cost || + a.cost_model.cost_per_call.unwrap_or(0.0) <= max_cost + }); + } + + // Filter by latency + if let Some(max_latency) = constraints.max_latency_ms { + candidates.retain(|a| a.performance.avg_latency_ms <= max_latency); + } + + // Filter by quality + if let Some(min_quality) = constraints.min_quality { + candidates.retain(|a| a.performance.quality_score >= min_quality); + } + + // Filter excluded + if let Some(excluded) = &constraints.excluded_agents { + candidates.retain(|a| !excluded.contains(&a.name)); + } + + Ok(candidates) + } + + fn score_agent( + &self, + agent: &Agent, + request_embedding: &[f32], + optimize_for: OptimizationTarget, + ) -> f64 { + // Capability match score + let capability_sim = cosine_similarity(request_embedding, &agent.capability_embedding); + + match optimize_for { + OptimizationTarget::Cost => { + let cost = agent.cost_model.cost_per_1k_tokens.unwrap_or(0.01); + capability_sim * (1.0 / (1.0 + cost)) + } + OptimizationTarget::Latency => { + let latency_factor = 1.0 / (1.0 + agent.performance.avg_latency_ms / 1000.0); + capability_sim * latency_factor + } + OptimizationTarget::Quality => { + capability_sim * agent.performance.quality_score + } + OptimizationTarget::Balanced => { + let cost = agent.cost_model.cost_per_1k_tokens.unwrap_or(0.01); + let cost_factor = 1.0 / (1.0 + cost); + let latency_factor = 1.0 / (1.0 + agent.performance.avg_latency_ms / 1000.0); + let quality = agent.performance.quality_score; + + capability_sim * (0.3 * cost_factor + 0.3 * latency_factor + 0.4 * quality) + } + OptimizationTarget::QualityPerDollar => { + let cost = agent.cost_model.cost_per_1k_tokens.unwrap_or(0.01); + capability_sim * agent.performance.quality_score / (cost + 0.001) + } + } + } + + fn estimate_cost(&self, agent: &Agent, request: &str) -> f64 { + let estimated_tokens = (request.len() / 4) as f64; // Rough estimate + + if let Some(cost_per_1k) = agent.cost_model.cost_per_1k_tokens { + cost_per_1k * estimated_tokens / 1000.0 + } else if let Some(cost_per_call) = agent.cost_model.cost_per_call { + cost_per_call + } else { + 0.0 + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum OptimizationTarget { + Cost, + Latency, + Quality, + Balanced, + QualityPerDollar, +} + +#[pg_extern] +fn ruvector_route( + request: &str, + optimize_for: default!(&str, "'balanced'"), + required_capabilities: default!(Option>, "NULL"), + max_cost: default!(Option, "NULL"), + max_latency_ms: default!(Option, "NULL"), + min_quality: default!(Option, "NULL"), +) -> pgrx::JsonB { + let router = get_router(); + + let constraints = RoutingConstraints { + required_capabilities, + max_cost, + max_latency_ms, + min_quality, + excluded_agents: None, + }; + + let target = match optimize_for { + "cost" => OptimizationTarget::Cost, + "latency" => OptimizationTarget::Latency, + "quality" => OptimizationTarget::Quality, + "quality_per_dollar" => OptimizationTarget::QualityPerDollar, + _ => OptimizationTarget::Balanced, + }; + + match router.route(request, &constraints, target) { + Ok(decision) => pgrx::JsonB(serde_json::json!({ + "agent_name": decision.agent.name, + "confidence": decision.confidence, + "estimated_cost": decision.estimated_cost, + "estimated_latency_ms": decision.estimated_latency_ms, + "reasoning": decision.reasoning, + })), + Err(e) => pgrx::JsonB(serde_json::json!({ + "error": format!("{:?}", e), + })), + } +} +``` + +### Phase 5: Semantic Routes (Week 10-11) + +```rust +// src/routing/semantic_routes.rs + +pub struct SemanticRoutes { + routes: DashMap, + index: HnswIndex, +} + +#[derive(Debug, Clone)] +pub struct SemanticRoute { + pub name: String, + pub description: String, + pub embedding: Vec, + pub target_agent: String, + pub priority: i32, + pub conditions: Option, +} + +#[derive(Debug, Clone)] +pub struct RouteConditions { + pub time_range: Option<(chrono::NaiveTime, chrono::NaiveTime)>, + pub user_tier: Option>, + pub rate_limit: Option, +} + +impl SemanticRoutes { + pub fn add_route(&self, route: SemanticRoute) { + self.index.insert(&route.name, &route.embedding); + self.routes.insert(route.name.clone(), route); + } + + pub fn match_route(&self, query_embedding: &[f32], k: usize) -> Vec<(SemanticRoute, f32)> { + let results = self.index.search(query_embedding, k); + + results.iter() + .filter_map(|(name, score)| { + self.routes.get(name.as_str()) + .map(|r| (r.clone(), *score)) + }) + .collect() + } +} + +#[pg_extern] +fn ruvector_create_route( + name: &str, + description: &str, + target_agent: &str, + priority: default!(i32, 0), + embedding: default!(Option>, "NULL"), +) -> bool { + let routes = get_semantic_routes(); + + let embedding = embedding.unwrap_or_else(|| get_embedding(description)); + + let route = SemanticRoute { + name: name.to_string(), + description: description.to_string(), + embedding, + target_agent: target_agent.to_string(), + priority, + conditions: None, + }; + + routes.add_route(route); + true +} + +#[pg_extern] +fn ruvector_semantic_route( + query: &str, + top_k: default!(i32, 3), +) -> TableIterator<'static, ( + name!(route_name, String), + name!(similarity, f32), + name!(target_agent, String), + name!(confidence, f32), +)> { + let routes = get_semantic_routes(); + let embedding = get_embedding(query); + + let matches = routes.match_route(&embedding, top_k as usize); + + let results: Vec<_> = matches.into_iter() + .map(|(route, similarity)| { + let confidence = similarity * (route.priority as f32 + 1.0) / 10.0; + (route.name, similarity, route.target_agent, confidence.min(1.0)) + }) + .collect(); + + TableIterator::new(results) +} +``` + +### Phase 6: Cost Optimizer (Week 12) + +```rust +// src/routing/cost_optimizer.rs + +pub struct CostOptimizer { + budget_tracker: BudgetTracker, + usage_history: UsageHistory, +} + +#[derive(Debug, Clone)] +pub struct BudgetAllocation { + pub agent_budgets: HashMap, + pub total_budget: f64, + pub period: chrono::Duration, +} + +impl CostOptimizer { + /// Optimize budget allocation across agents + pub fn optimize_budget( + &self, + total_budget: f64, + quality_threshold: f64, + latency_threshold: f64, + period_days: i64, + ) -> BudgetAllocation { + let agents = get_agent_registry().list_active(); + let history = self.usage_history.get_period(period_days); + + // Calculate value score for each agent + let agent_values: HashMap = agents.iter() + .filter(|a| { + a.performance.quality_score >= quality_threshold && + a.performance.avg_latency_ms <= latency_threshold + }) + .map(|a| { + let historical_usage = history.get(&a.name).map(|h| h.request_count).unwrap_or(1); + let quality = a.performance.quality_score; + let cost_efficiency = 1.0 / (a.cost_model.cost_per_1k_tokens.unwrap_or(0.01) + 0.001); + + let value = quality * cost_efficiency * (historical_usage as f64).ln(); + (a.name.clone(), value) + }) + .collect(); + + // Allocate budget proportionally to value + let total_value: f64 = agent_values.values().sum(); + let agent_budgets: HashMap = agent_values.iter() + .map(|(name, value)| { + let allocation = (value / total_value) * total_budget; + (name.clone(), allocation) + }) + .collect(); + + BudgetAllocation { + agent_budgets, + total_budget, + period: chrono::Duration::days(period_days), + } + } + + /// Check if request fits within budget + pub fn check_budget(&self, agent: &str, estimated_cost: f64) -> bool { + self.budget_tracker.remaining(agent) >= estimated_cost + } + + /// Record usage + pub fn record_usage(&self, agent: &str, actual_cost: f64, success: bool, latency_ms: f64) { + self.budget_tracker.deduct(agent, actual_cost); + self.usage_history.record(agent, actual_cost, success, latency_ms); + } +} + +#[pg_extern] +fn ruvector_optimize_budget( + monthly_budget: f64, + quality_threshold: default!(f64, 0.8), + latency_threshold_ms: default!(f64, 5000.0), +) -> pgrx::JsonB { + let optimizer = get_cost_optimizer(); + + let allocation = optimizer.optimize_budget( + monthly_budget, + quality_threshold, + latency_threshold_ms, + 30, + ); + + pgrx::JsonB(serde_json::json!({ + "allocations": allocation.agent_budgets, + "total_budget": allocation.total_budget, + "period_days": 30, + })) +} + +#[pg_extern] +fn ruvector_routing_analytics( + time_range: default!(&str, "'7 days'"), + group_by: default!(&str, "'agent'"), +) -> TableIterator<'static, ( + name!(agent, String), + name!(total_requests, i64), + name!(total_cost, f64), + name!(avg_latency_ms, f64), + name!(success_rate, f64), +)> { + let optimizer = get_cost_optimizer(); + let days = parse_time_range(time_range); + + let stats = optimizer.usage_history.aggregate(days, group_by); + + TableIterator::new(stats) +} +``` + +## Benchmarks + +| Operation | Input Size | Time (μs) | Memory | +|-----------|------------|-----------|--------| +| FastGRNN step | 768-dim | 45 | 1KB | +| Route classification | 768-dim | 120 | 4KB | +| Semantic route match (1K routes) | 768-dim | 250 | 8KB | +| Full routing decision | 768-dim | 500 | 16KB | + +## Dependencies + +```toml +[dependencies] +# Link to ruvector-tiny-dancer +ruvector-tiny-dancer-core = { path = "../ruvector-tiny-dancer-core", optional = true } + +# SIMD +simsimd = "5.9" + +# Time handling +chrono = "0.4" + +# Concurrent collections +dashmap = "6.0" +``` + +## Feature Flags + +```toml +[features] +routing = [] +routing-fastgrnn = ["routing"] +routing-semantic = ["routing", "index-hnsw"] +routing-optimizer = ["routing"] +routing-all = ["routing-fastgrnn", "routing-semantic", "routing-optimizer"] +``` diff --git a/crates/ruvector-postgres/docs/integration-plans/08-optimization-strategy.md b/crates/ruvector-postgres/docs/integration-plans/08-optimization-strategy.md new file mode 100644 index 00000000..20b7113a --- /dev/null +++ b/crates/ruvector-postgres/docs/integration-plans/08-optimization-strategy.md @@ -0,0 +1,666 @@ +# Optimization Strategy + +## Overview + +Comprehensive optimization strategies for ruvector-postgres covering SIMD acceleration, memory management, query optimization, and PostgreSQL-specific tuning. + +## SIMD Optimization + +### Architecture Detection & Dispatch + +```rust +// src/simd/dispatch.rs + +#[derive(Debug, Clone, Copy)] +pub enum SimdCapability { + AVX512, + AVX2, + NEON, + Scalar, +} + +lazy_static! { + static ref SIMD_CAPABILITY: SimdCapability = detect_simd(); +} + +fn detect_simd() -> SimdCapability { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512vl") { + return SimdCapability::AVX512; + } + if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") { + return SimdCapability::AVX2; + } + } + + #[cfg(target_arch = "aarch64")] + { + return SimdCapability::NEON; + } + + SimdCapability::Scalar +} + +/// Dispatch to optimal implementation +#[inline] +pub fn distance_dispatch(a: &[f32], b: &[f32], metric: DistanceMetric) -> f32 { + match *SIMD_CAPABILITY { + SimdCapability::AVX512 => distance_avx512(a, b, metric), + SimdCapability::AVX2 => distance_avx2(a, b, metric), + SimdCapability::NEON => distance_neon(a, b, metric), + SimdCapability::Scalar => distance_scalar(a, b, metric), + } +} +``` + +### Vectorized Operations + +```rust +// AVX-512 optimized distance +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "avx512f", enable = "avx512vl")] +unsafe fn euclidean_avx512(a: &[f32], b: &[f32]) -> f32 { + use std::arch::x86_64::*; + + let mut sum = _mm512_setzero_ps(); + let chunks = a.len() / 16; + + for i in 0..chunks { + let va = _mm512_loadu_ps(a.as_ptr().add(i * 16)); + let vb = _mm512_loadu_ps(b.as_ptr().add(i * 16)); + let diff = _mm512_sub_ps(va, vb); + sum = _mm512_fmadd_ps(diff, diff, sum); + } + + // Handle remainder + let mut result = _mm512_reduce_add_ps(sum); + for i in (chunks * 16)..a.len() { + let diff = a[i] - b[i]; + result += diff * diff; + } + + result.sqrt() +} + +// ARM NEON optimized distance +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +unsafe fn euclidean_neon(a: &[f32], b: &[f32]) -> f32 { + use std::arch::aarch64::*; + + let mut sum = vdupq_n_f32(0.0); + let chunks = a.len() / 4; + + for i in 0..chunks { + let va = vld1q_f32(a.as_ptr().add(i * 4)); + let vb = vld1q_f32(b.as_ptr().add(i * 4)); + let diff = vsubq_f32(va, vb); + sum = vfmaq_f32(sum, diff, diff); + } + + let sum_array: [f32; 4] = std::mem::transmute(sum); + let mut result: f32 = sum_array.iter().sum(); + + for i in (chunks * 4)..a.len() { + let diff = a[i] - b[i]; + result += diff * diff; + } + + result.sqrt() +} +``` + +### Batch Processing + +```rust +/// Process multiple vectors in parallel batches +pub fn batch_distances( + query: &[f32], + candidates: &[&[f32]], + metric: DistanceMetric, +) -> Vec { + const BATCH_SIZE: usize = 256; + + candidates + .par_chunks(BATCH_SIZE) + .flat_map(|batch| { + batch.iter() + .map(|c| distance_dispatch(query, c, metric)) + .collect::>() + }) + .collect() +} + +/// Prefetch-optimized batch processing +pub fn batch_distances_prefetch( + query: &[f32], + candidates: &[Vec], + metric: DistanceMetric, +) -> Vec { + let mut results = Vec::with_capacity(candidates.len()); + + for i in 0..candidates.len() { + // Prefetch next vectors + if i + 4 < candidates.len() { + prefetch_read(&candidates[i + 4]); + } + + results.push(distance_dispatch(query, &candidates[i], metric)); + } + + results +} + +#[inline] +fn prefetch_read(data: &T) { + #[cfg(target_arch = "x86_64")] + unsafe { + std::arch::x86_64::_mm_prefetch( + data as *const T as *const i8, + std::arch::x86_64::_MM_HINT_T0, + ); + } +} +``` + +## Memory Optimization + +### Zero-Copy Operations + +```rust +/// Memory-mapped vector storage +pub struct MappedVectors { + mmap: memmap2::Mmap, + dim: usize, + count: usize, +} + +impl MappedVectors { + pub fn open(path: &Path, dim: usize) -> io::Result { + let file = File::open(path)?; + let mmap = unsafe { memmap2::Mmap::map(&file)? }; + let count = mmap.len() / (dim * std::mem::size_of::()); + + Ok(Self { mmap, dim, count }) + } + + /// Zero-copy access to vector + #[inline] + pub fn get(&self, index: usize) -> &[f32] { + let offset = index * self.dim; + let bytes = &self.mmap[offset * 4..(offset + self.dim) * 4]; + unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const f32, self.dim) } + } +} + +/// PostgreSQL shared memory integration +pub struct SharedVectorCache { + shmem: pg_sys::dsm_segment, + vectors: *mut f32, + capacity: usize, + dim: usize, +} + +impl SharedVectorCache { + pub fn create(capacity: usize, dim: usize) -> Self { + let size = capacity * dim * std::mem::size_of::(); + let shmem = unsafe { pg_sys::dsm_create(size, 0) }; + let vectors = unsafe { pg_sys::dsm_segment_address(shmem) as *mut f32 }; + + Self { shmem, vectors, capacity, dim } + } + + #[inline] + pub fn get(&self, index: usize) -> &[f32] { + unsafe { + std::slice::from_raw_parts( + self.vectors.add(index * self.dim), + self.dim + ) + } + } +} +``` + +### Memory Pool + +```rust +/// Thread-local memory pool for temporary allocations +thread_local! { + static VECTOR_POOL: RefCell = RefCell::new(VectorPool::new()); +} + +pub struct VectorPool { + pools: HashMap>>, + max_cached: usize, +} + +impl VectorPool { + pub fn new() -> Self { + Self { + pools: HashMap::new(), + max_cached: 1024, + } + } + + pub fn acquire(&mut self, dim: usize) -> Vec { + self.pools + .get_mut(&dim) + .and_then(|pool| pool.pop()) + .unwrap_or_else(|| vec![0.0; dim]) + } + + pub fn release(&mut self, mut vec: Vec) { + let dim = vec.len(); + let pool = self.pools.entry(dim).or_insert_with(Vec::new); + + if pool.len() < self.max_cached { + vec.iter_mut().for_each(|x| *x = 0.0); + pool.push(vec); + } + } +} + +/// RAII guard for pooled vectors +pub struct PooledVec(Vec); + +impl Drop for PooledVec { + fn drop(&mut self) { + VECTOR_POOL.with(|pool| { + pool.borrow_mut().release(std::mem::take(&mut self.0)); + }); + } +} +``` + +### Quantization for Memory Reduction + +```rust +/// 8-bit scalar quantization (4x memory reduction) +pub struct ScalarQuantized { + data: Vec, + scale: f32, + offset: f32, + dim: usize, +} + +impl ScalarQuantized { + pub fn from_f32(vectors: &[Vec]) -> Self { + let (min, max) = find_minmax(vectors); + let scale = (max - min) / 255.0; + let offset = min; + + let data: Vec = vectors.iter() + .flat_map(|v| { + v.iter().map(|&x| ((x - offset) / scale) as u8) + }) + .collect(); + + Self { data, scale, offset, dim: vectors[0].len() } + } + + #[inline] + pub fn distance(&self, query: &[f32], index: usize) -> f32 { + let start = index * self.dim; + let quantized = &self.data[start..start + self.dim]; + + let mut sum = 0.0f32; + for (i, &q) in quantized.iter().enumerate() { + let reconstructed = q as f32 * self.scale + self.offset; + let diff = query[i] - reconstructed; + sum += diff * diff; + } + sum.sqrt() + } +} + +/// Binary quantization (32x memory reduction) +pub struct BinaryQuantized { + data: BitVec, + dim: usize, +} + +impl BinaryQuantized { + pub fn from_f32(vectors: &[Vec]) -> Self { + let dim = vectors[0].len(); + let mut data = BitVec::with_capacity(vectors.len() * dim); + + for vec in vectors { + for &x in vec { + data.push(x > 0.0); + } + } + + Self { data, dim } + } + + /// Hamming distance (extremely fast) + #[inline] + pub fn hamming_distance(&self, query_bits: &BitVec, index: usize) -> u32 { + let start = index * self.dim; + let doc_bits = &self.data[start..start + self.dim]; + + // XOR and popcount + doc_bits.iter() + .zip(query_bits.iter()) + .filter(|(a, b)| a != b) + .count() as u32 + } +} +``` + +## Query Optimization + +### Query Plan Caching + +```rust +/// Cache compiled query plans +pub struct QueryPlanCache { + cache: DashMap>, + max_size: usize, + hit_count: AtomicU64, + miss_count: AtomicU64, +} + +impl QueryPlanCache { + pub fn get_or_compile(&self, query_hash: u64, compile: F) -> Arc + where + F: FnOnce() -> QueryPlan, + { + if let Some(plan) = self.cache.get(&query_hash) { + self.hit_count.fetch_add(1, Ordering::Relaxed); + return plan.clone(); + } + + self.miss_count.fetch_add(1, Ordering::Relaxed); + let plan = Arc::new(compile()); + + // LRU eviction if needed + if self.cache.len() >= self.max_size { + self.evict_lru(); + } + + self.cache.insert(query_hash, plan.clone()); + plan + } +} +``` + +### Adaptive Index Selection + +```rust +/// Choose optimal index based on query characteristics +pub fn select_index( + query: &SearchQuery, + available_indexes: &[IndexInfo], + table_stats: &TableStats, +) -> &IndexInfo { + let selectivity = estimate_selectivity(query, table_stats); + let expected_results = (table_stats.row_count as f64 * selectivity) as usize; + + // Decision tree for index selection + if expected_results < 100 { + // Sequential scan may be faster for very small result sets + return &available_indexes.iter() + .find(|i| i.index_type == IndexType::BTree) + .unwrap_or(&available_indexes[0]); + } + + if query.has_vector_similarity() { + // Prefer HNSW for similarity search + if let Some(hnsw) = available_indexes.iter() + .find(|i| i.index_type == IndexType::Hnsw) + { + return hnsw; + } + } + + // Default to IVFFlat for range queries + available_indexes.iter() + .find(|i| i.index_type == IndexType::IvfFlat) + .unwrap_or(&available_indexes[0]) +} + +/// Adaptive ef_search based on query complexity +pub fn adaptive_ef_search( + query: &[f32], + index: &HnswIndex, + target_recall: f64, +) -> usize { + // Start with learned baseline + let baseline = index.learned_ef_for_query(query); + + // Adjust based on query density + let query_norm = query.iter().map(|x| x * x).sum::().sqrt(); + let density_factor = if query_norm < 1.0 { 1.2 } else { 1.0 }; + + // Adjust based on target recall + let recall_factor = match target_recall { + r if r >= 0.99 => 2.0, + r if r >= 0.95 => 1.5, + r if r >= 0.90 => 1.2, + _ => 1.0, + }; + + ((baseline as f64 * density_factor * recall_factor) as usize).max(10) +} +``` + +### Parallel Query Execution + +```rust +/// Parallel index scan +pub fn parallel_search( + query: &[f32], + index: &HnswIndex, + k: usize, + num_threads: usize, +) -> Vec<(u64, f32)> { + // Divide search into regions + let entry_points = index.get_diverse_entry_points(num_threads); + + let results: Vec<_> = entry_points + .into_par_iter() + .map(|entry| index.search_from(query, entry, k * 2)) + .collect(); + + // Merge results + let mut merged: Vec<_> = results.into_iter().flatten().collect(); + merged.sort_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()); + merged.dedup_by_key(|(id, _)| *id); + merged.truncate(k); + merged +} + +/// Intra-query parallelism for complex queries +pub fn parallel_filter_search( + query: &[f32], + filters: &[Filter], + index: &HnswIndex, + k: usize, +) -> Vec<(u64, f32)> { + // Stage 1: Parallel filter evaluation + let filter_results: Vec> = filters + .par_iter() + .map(|f| evaluate_filter(f)) + .collect(); + + // Stage 2: Intersect filter results + let valid_ids = filter_results + .into_iter() + .reduce(|a, b| a.intersection(&b).copied().collect()) + .unwrap_or_default(); + + // Stage 3: Vector search with filter + index.search_with_filter(query, k, |id| valid_ids.contains(&id)) +} +``` + +## PostgreSQL-Specific Optimizations + +### Buffer Management + +```rust +/// Custom buffer pool for vector data +pub struct VectorBufferPool { + buffers: Vec, + free_list: Mutex>, + usage_count: Vec, +} + +impl VectorBufferPool { + /// Pin buffer with usage tracking + pub fn pin(&self, index: usize) -> PinnedBuffer { + self.usage_count[index].fetch_add(1, Ordering::Relaxed); + PinnedBuffer { pool: self, index } + } + + /// Clock sweep eviction + pub fn evict_if_needed(&self) -> Option { + let mut hand = 0; + loop { + let count = self.usage_count[hand].load(Ordering::Relaxed); + if count == 0 { + return Some(hand); + } + self.usage_count[hand].store(count - 1, Ordering::Relaxed); + hand = (hand + 1) % self.buffers.len(); + } + } +} +``` + +### WAL Optimization + +```rust +/// Batch WAL writes for bulk operations +pub fn bulk_insert_optimized( + vectors: &[Vec], + ids: &[u64], + batch_size: usize, +) { + // Group into batches + for batch in vectors.chunks(batch_size).zip(ids.chunks(batch_size)) { + // Single WAL record for batch + let wal_record = create_batch_wal_record(batch.0, batch.1); + + unsafe { + // Write single WAL entry + pg_sys::XLogInsert(RUVECTOR_RMGR_ID, XLOG_RUVECTOR_BATCH_INSERT); + } + + // Apply batch + apply_batch(batch.0, batch.1); + } +} +``` + +### Statistics Collection + +```rust +/// Collect statistics for query planner +pub fn analyze_vector_column( + table_oid: pg_sys::Oid, + column_num: i16, + sample_rows: &[pg_sys::HeapTuple], +) -> VectorStats { + let mut vectors: Vec> = Vec::new(); + + // Extract sample vectors + for tuple in sample_rows { + if let Some(vec) = extract_vector(tuple, column_num) { + vectors.push(vec); + } + } + + // Compute statistics + let dim = vectors[0].len(); + let centroid = compute_centroid(&vectors); + let avg_norm = vectors.iter() + .map(|v| v.iter().map(|x| x * x).sum::().sqrt()) + .sum::() / vectors.len() as f32; + + // Compute distribution statistics + let distances: Vec = vectors.iter() + .map(|v| euclidean_distance(v, ¢roid)) + .collect(); + + VectorStats { + dim, + avg_norm, + centroid, + distance_histogram: compute_histogram(&distances, 100), + null_fraction: 0.0, // TODO: compute from sample + } +} +``` + +## Configuration Recommendations + +### GUC Parameters + +```sql +-- Memory settings +SET ruvector.shared_cache_size = '256MB'; +SET ruvector.work_mem = '64MB'; + +-- Parallelism +SET ruvector.max_parallel_workers = 4; +SET ruvector.parallel_search_threshold = 10000; + +-- Index tuning +SET ruvector.ef_search = 64; -- HNSW search quality +SET ruvector.probes = 10; -- IVFFlat probe count +SET ruvector.quantization = 'sq8'; -- Default quantization + +-- Learning +SET ruvector.learning_enabled = on; +SET ruvector.learning_rate = 0.01; + +-- Maintenance +SET ruvector.maintenance_work_mem = '512MB'; +SET ruvector.autovacuum_enabled = on; +``` + +### Hardware-Specific Tuning + +```yaml +# Intel Xeon (AVX-512) +ruvector.simd_mode: 'avx512' +ruvector.vector_batch_size: 256 +ruvector.prefetch_distance: 4 + +# AMD EPYC (AVX2) +ruvector.simd_mode: 'avx2' +ruvector.vector_batch_size: 128 +ruvector.prefetch_distance: 8 + +# Apple M1/M2 (NEON) +ruvector.simd_mode: 'neon' +ruvector.vector_batch_size: 64 +ruvector.prefetch_distance: 4 + +# Memory-constrained +ruvector.quantization: 'binary' +ruvector.shared_cache_size: '64MB' +ruvector.enable_mmap: on +``` + +## Performance Monitoring + +```sql +-- View SIMD statistics +SELECT * FROM ruvector_simd_stats(); + +-- Memory usage +SELECT * FROM ruvector_memory_stats(); + +-- Cache hit rates +SELECT * FROM ruvector_cache_stats(); + +-- Query performance +SELECT * FROM ruvector_query_stats() +ORDER BY total_time DESC +LIMIT 10; +``` diff --git a/crates/ruvector-postgres/docs/integration-plans/09-benchmarking-plan.md b/crates/ruvector-postgres/docs/integration-plans/09-benchmarking-plan.md new file mode 100644 index 00000000..b8ccf2bf --- /dev/null +++ b/crates/ruvector-postgres/docs/integration-plans/09-benchmarking-plan.md @@ -0,0 +1,694 @@ +# Benchmarking Plan + +## Overview + +Comprehensive benchmarking strategy for ruvector-postgres covering micro-benchmarks, integration tests, comparison with competitors, and production workload simulation. + +## Benchmark Categories + +### 1. Micro-Benchmarks + +Test individual operations in isolation. + +```rust +// benches/distance_bench.rs +use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId}; + +fn bench_euclidean_distance(c: &mut Criterion) { + let dims = [128, 256, 512, 768, 1024, 1536]; + + let mut group = c.benchmark_group("euclidean_distance"); + + for dim in dims { + let a: Vec = (0..dim).map(|_| rand::random()).collect(); + let b: Vec = (0..dim).map(|_| rand::random()).collect(); + + group.bench_with_input( + BenchmarkId::new("scalar", dim), + &dim, + |bench, _| bench.iter(|| euclidean_scalar(&a, &b)) + ); + + group.bench_with_input( + BenchmarkId::new("simd_auto", dim), + &dim, + |bench, _| bench.iter(|| euclidean_simd(&a, &b)) + ); + + #[cfg(target_arch = "x86_64")] + { + group.bench_with_input( + BenchmarkId::new("avx2", dim), + &dim, + |bench, _| bench.iter(|| unsafe { euclidean_avx2(&a, &b) }) + ); + + if is_x86_feature_detected!("avx512f") { + group.bench_with_input( + BenchmarkId::new("avx512", dim), + &dim, + |bench, _| bench.iter(|| unsafe { euclidean_avx512(&a, &b) }) + ); + } + } + } + + group.finish(); +} + +fn bench_cosine_distance(c: &mut Criterion) { + // Similar structure for cosine +} + +fn bench_dot_product(c: &mut Criterion) { + // Similar structure for dot product +} + +criterion_group!( + distance_benches, + bench_euclidean_distance, + bench_cosine_distance, + bench_dot_product +); +criterion_main!(distance_benches); +``` + +### Expected Results: Distance Functions + +| Operation | Dimension | Scalar (ns) | AVX2 (ns) | AVX-512 (ns) | Speedup | +|-----------|-----------|-------------|-----------|--------------|---------| +| Euclidean | 128 | 180 | 45 | 28 | 6.4x | +| Euclidean | 768 | 980 | 210 | 125 | 7.8x | +| Euclidean | 1536 | 1950 | 420 | 245 | 8.0x | +| Cosine | 128 | 240 | 62 | 38 | 6.3x | +| Cosine | 768 | 1280 | 285 | 168 | 7.6x | +| Dot Product | 768 | 450 | 95 | 58 | 7.8x | + +### 2. Index Benchmarks + +```rust +// benches/index_bench.rs + +fn bench_hnsw_build(c: &mut Criterion) { + let sizes = [10_000, 100_000, 1_000_000]; + let dims = [128, 768]; + + let mut group = c.benchmark_group("hnsw_build"); + group.sample_size(10); + group.measurement_time(Duration::from_secs(30)); + + for size in sizes { + for dim in dims { + let vectors = generate_random_vectors(size, dim); + + group.bench_with_input( + BenchmarkId::new(format!("{}d", dim), size), + &(&vectors, dim), + |bench, (vecs, _)| { + bench.iter(|| { + let mut index = HnswIndex::new(HnswConfig { + m: 16, + ef_construction: 200, + ..Default::default() + }); + for (i, v) in vecs.iter().enumerate() { + index.insert(i as u64, v); + } + }) + } + ); + } + } + + group.finish(); +} + +fn bench_hnsw_search(c: &mut Criterion) { + // Pre-build index + let index = build_hnsw_index(1_000_000, 768); + let queries = generate_random_vectors(1000, 768); + + let ef_values = [10, 50, 100, 200, 500]; + let k_values = [1, 10, 100]; + + let mut group = c.benchmark_group("hnsw_search"); + + for ef in ef_values { + for k in k_values { + group.bench_with_input( + BenchmarkId::new(format!("ef{}_k{}", ef, k), "1M"), + &(&index, &queries, ef, k), + |bench, (idx, qs, ef, k)| { + bench.iter(|| { + for q in qs.iter() { + idx.search(q, *k, *ef); + } + }) + } + ); + } + } + + group.finish(); +} + +fn bench_ivfflat_search(c: &mut Criterion) { + let index = build_ivfflat_index(1_000_000, 768, 1000); // 1000 lists + let queries = generate_random_vectors(1000, 768); + + let probe_values = [1, 5, 10, 20, 50]; + + let mut group = c.benchmark_group("ivfflat_search"); + + for probes in probe_values { + group.bench_with_input( + BenchmarkId::new(format!("probes{}", probes), "1M"), + &probes, + |bench, probes| { + bench.iter(|| { + for q in queries.iter() { + index.search(q, 10, *probes); + } + }) + } + ); + } + + group.finish(); +} +``` + +### Expected Results: Index Operations + +| Index | Size | Build Time | Memory | Search (p50) | Search (p99) | Recall@10 | +|-------|------|------------|--------|--------------|--------------|-----------| +| HNSW | 100K | 45s | 450MB | 0.8ms | 2.1ms | 0.98 | +| HNSW | 1M | 8min | 4.5GB | 1.2ms | 4.5ms | 0.97 | +| HNSW | 10M | 95min | 45GB | 2.1ms | 8.2ms | 0.96 | +| IVFFlat | 100K | 12s | 320MB | 1.5ms | 4.2ms | 0.92 | +| IVFFlat | 1M | 2min | 3.2GB | 3.2ms | 9.5ms | 0.91 | +| IVFFlat | 10M | 25min | 32GB | 8.5ms | 25ms | 0.89 | + +### 3. Quantization Benchmarks + +```rust +// benches/quantization_bench.rs + +fn bench_quantization_build(c: &mut Criterion) { + let vectors = generate_random_vectors(100_000, 768); + + let mut group = c.benchmark_group("quantization_build"); + + group.bench_function("scalar_q8", |bench| { + bench.iter(|| ScalarQuantized::from_f32(&vectors)) + }); + + group.bench_function("binary", |bench| { + bench.iter(|| BinaryQuantized::from_f32(&vectors)) + }); + + group.bench_function("product_q", |bench| { + bench.iter(|| ProductQuantized::from_f32(&vectors, 96, 256)) + }); + + group.finish(); +} + +fn bench_quantized_search(c: &mut Criterion) { + let vectors = generate_random_vectors(1_000_000, 768); + let query = generate_random_vectors(1, 768).pop().unwrap(); + + let sq8 = ScalarQuantized::from_f32(&vectors); + let binary = BinaryQuantized::from_f32(&vectors); + let pq = ProductQuantized::from_f32(&vectors, 96, 256); + + let mut group = c.benchmark_group("quantized_search_1M"); + + group.bench_function("full_precision", |bench| { + bench.iter(|| { + vectors.iter() + .enumerate() + .map(|(i, v)| (i, euclidean_distance(&query, v))) + .min_by(|a, b| a.1.partial_cmp(&b.1).unwrap()) + }) + }); + + group.bench_function("scalar_q8", |bench| { + bench.iter(|| { + (0..vectors.len()) + .map(|i| (i, sq8.distance(&query, i))) + .min_by(|a, b| a.1.partial_cmp(&b.1).unwrap()) + }) + }); + + group.bench_function("binary_hamming", |bench| { + let query_bits = binary.quantize_query(&query); + bench.iter(|| { + (0..vectors.len()) + .map(|i| (i, binary.hamming_distance(&query_bits, i))) + .min_by(|a, b| a.1.cmp(&b.1)) + }) + }); + + group.finish(); +} +``` + +### Expected Results: Quantization + +| Method | Memory (1M 768d) | Search Time | Recall Loss | +|--------|------------------|-------------|-------------| +| Full Precision | 3GB | 850ms | 0% | +| Scalar Q8 | 750MB | 420ms | 1-2% | +| Binary | 94MB | 95ms | 5-10% | +| Product Q | 200MB | 180ms | 2-4% | + +### 4. PostgreSQL Integration Benchmarks + +```sql +-- Test setup script +CREATE EXTENSION ruvector; + +-- Create test table +CREATE TABLE bench_vectors ( + id SERIAL PRIMARY KEY, + embedding vector(768), + category TEXT, + created_at TIMESTAMP DEFAULT NOW() +); + +-- Insert test data +INSERT INTO bench_vectors (embedding, category) +SELECT + array_agg(random())::vector(768), + 'category_' || (i % 100)::text +FROM generate_series(1, 1000000) i +GROUP BY i; + +-- Create indexes +CREATE INDEX ON bench_vectors USING hnsw (embedding vector_cosine_ops) +WITH (m = 16, ef_construction = 200); + +CREATE INDEX ON bench_vectors USING ivfflat (embedding vector_cosine_ops) +WITH (lists = 1000); + +-- Benchmark queries +\timing on + +-- Simple k-NN +EXPLAIN ANALYZE +SELECT id, embedding <=> '[...]'::vector AS distance +FROM bench_vectors +ORDER BY distance +LIMIT 10; + +-- k-NN with filter +EXPLAIN ANALYZE +SELECT id, embedding <=> '[...]'::vector AS distance +FROM bench_vectors +WHERE category = 'category_42' +ORDER BY distance +LIMIT 10; + +-- Batch search +EXPLAIN ANALYZE +SELECT b.id, q.query_id, + b.embedding <=> q.embedding AS distance +FROM bench_vectors b +CROSS JOIN ( + SELECT 1 AS query_id, '[...]'::vector AS embedding + UNION ALL + SELECT 2, '[...]'::vector + -- ... more queries +) q +ORDER BY q.query_id, distance +LIMIT 100; +``` + +### 5. Competitor Comparison + +```python +# benchmark_comparison.py + +import time +import numpy as np +from typing import List, Tuple + +# Test data +SIZES = [10_000, 100_000, 1_000_000] +DIMS = [128, 768, 1536] +K = 10 +QUERIES = 1000 + +def run_pgvector_benchmark(conn, size, dim): + """Benchmark pgvector""" + # Setup + conn.execute(f""" + CREATE TABLE pgvector_test ( + id SERIAL PRIMARY KEY, + embedding vector({dim}) + ); + CREATE INDEX ON pgvector_test USING hnsw (embedding vector_cosine_ops); + """) + + # Insert + start = time.time() + # ... bulk insert + build_time = time.time() - start + + # Search + query = np.random.randn(dim).astype(np.float32) + start = time.time() + for _ in range(QUERIES): + conn.execute(f""" + SELECT id FROM pgvector_test + ORDER BY embedding <=> %s + LIMIT {K} + """, (query.tolist(),)) + search_time = (time.time() - start) / QUERIES * 1000 + + return { + 'build_time': build_time, + 'search_time_ms': search_time, + } + +def run_ruvector_benchmark(conn, size, dim): + """Benchmark ruvector-postgres""" + # Similar setup with ruvector + pass + +def run_pinecone_benchmark(index, size, dim): + """Benchmark Pinecone (cloud)""" + pass + +def run_qdrant_benchmark(client, size, dim): + """Benchmark Qdrant""" + pass + +def run_milvus_benchmark(collection, size, dim): + """Benchmark Milvus""" + pass + +# Run all benchmarks +results = {} +for size in SIZES: + for dim in DIMS: + results[(size, dim)] = { + 'pgvector': run_pgvector_benchmark(...), + 'ruvector': run_ruvector_benchmark(...), + 'qdrant': run_qdrant_benchmark(...), + 'milvus': run_milvus_benchmark(...), + } + +# Generate comparison report +``` + +### Expected Comparison Results + +| System | 1M Build | 1M Search (p50) | 1M Search (p99) | Memory | Recall@10 | +|--------|----------|-----------------|-----------------|--------|-----------| +| **ruvector-postgres** | **5min** | **0.9ms** | **3.2ms** | **4.2GB** | **0.97** | +| pgvector | 12min | 2.1ms | 8.5ms | 4.8GB | 0.95 | +| Qdrant | 7min | 1.2ms | 4.1ms | 4.5GB | 0.96 | +| Milvus | 8min | 1.5ms | 5.2ms | 5.1GB | 0.96 | +| Pinecone (P1) | 3min* | 5ms* | 15ms* | N/A | 0.98 | + +*Cloud latency includes network overhead + +### 6. Stress Testing + +```bash +#!/bin/bash +# stress_test.sh + +# Configuration +DURATION=3600 # 1 hour +CONCURRENCY=100 +QPS_TARGET=10000 + +# Start PostgreSQL with ruvector +pg_ctl start -D $PGDATA + +# Run pgbench-style workload +pgbench -c $CONCURRENCY -j 10 -T $DURATION \ + -f stress_queries.sql \ + -P 10 \ + --rate=$QPS_TARGET \ + testdb + +# Monitor during test +while true; do + psql -c "SELECT * FROM ruvector_stats();" >> stats.log + psql -c "SELECT * FROM pg_stat_activity WHERE state = 'active';" >> activity.log + sleep 10 +done +``` + +### stress_queries.sql + +```sql +-- Mixed workload +\set query_type random(1, 100) + +\if :query_type <= 60 + -- 60% simple k-NN + SELECT id FROM vectors + ORDER BY embedding <=> :'random_vector'::vector + LIMIT 10; +\elif :query_type <= 80 + -- 20% filtered k-NN + SELECT id FROM vectors + WHERE category = :'random_category' + ORDER BY embedding <=> :'random_vector'::vector + LIMIT 10; +\elif :query_type <= 90 + -- 10% batch search + SELECT v.id, q.id as query_id + FROM vectors v, query_batch q + ORDER BY v.embedding <=> q.embedding + LIMIT 100; +\else + -- 10% insert + INSERT INTO vectors (embedding, category) + VALUES (:'random_vector'::vector, :'random_category'); +\endif +``` + +### 7. Memory Benchmarks + +```rust +// benches/memory_bench.rs + +fn bench_memory_footprint(c: &mut Criterion) { + let sizes = [100_000, 1_000_000, 10_000_000]; + + println!("\n=== Memory Footprint Analysis ===\n"); + + for size in sizes { + println!("Size: {} vectors", size); + + // Full precision vectors + let vectors: Vec> = generate_random_vectors(size, 768); + let raw_size = size * 768 * 4; + println!(" Raw vectors: {} MB", raw_size / 1_000_000); + + // HNSW index + let hnsw = HnswIndex::new(HnswConfig::default()); + for (i, v) in vectors.iter().enumerate() { + hnsw.insert(i as u64, v); + } + println!(" HNSW overhead: {} MB", hnsw.memory_usage() / 1_000_000); + + // Quantized + let sq8 = ScalarQuantized::from_f32(&vectors); + println!(" SQ8 size: {} MB", sq8.memory_usage() / 1_000_000); + + let binary = BinaryQuantized::from_f32(&vectors); + println!(" Binary size: {} MB", binary.memory_usage() / 1_000_000); + + println!(); + } +} +``` + +### 8. Recall vs Latency Analysis + +```python +# recall_latency_analysis.py + +import matplotlib.pyplot as plt +import numpy as np + +def measure_recall_latency_tradeoff(index, queries, ground_truth, ef_values): + """Measure recall vs latency for different ef values""" + results = [] + + for ef in ef_values: + latencies = [] + recalls = [] + + for i, query in enumerate(queries): + start = time.time() + results = index.search(query, k=10, ef=ef) + latency = (time.time() - start) * 1000 + + recall = len(set(results) & set(ground_truth[i])) / 10 + + latencies.append(latency) + recalls.append(recall) + + results.append({ + 'ef': ef, + 'avg_latency': np.mean(latencies), + 'p99_latency': np.percentile(latencies, 99), + 'avg_recall': np.mean(recalls), + }) + + return results + +# Plot results +plt.figure(figsize=(10, 6)) +plt.plot([r['avg_latency'] for r in results], + [r['avg_recall'] for r in results], 'b-o') +plt.xlabel('Latency (ms)') +plt.ylabel('Recall@10') +plt.title('Recall vs Latency Tradeoff') +plt.savefig('recall_latency.png') +``` + +## Benchmark Automation + +### CI/CD Integration + +```yaml +# .github/workflows/benchmark.yml +name: Benchmarks + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + benchmark: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install dependencies + run: | + sudo apt-get install postgresql-16 + cargo install cargo-criterion + + - name: Run micro-benchmarks + run: | + cargo criterion --output-format json > bench_results.json + + - name: Run PostgreSQL benchmarks + run: | + ./scripts/run_pg_benchmarks.sh + + - name: Compare with baseline + run: | + python scripts/compare_benchmarks.py \ + --baseline baseline.json \ + --current bench_results.json \ + --threshold 10 + + - name: Upload results + uses: actions/upload-artifact@v3 + with: + name: benchmark-results + path: bench_results.json +``` + +### Benchmark Dashboard + +```sql +-- Create benchmark results table +CREATE TABLE benchmark_results ( + id SERIAL PRIMARY KEY, + run_date TIMESTAMP DEFAULT NOW(), + git_commit TEXT, + benchmark_name TEXT, + metric_name TEXT, + value FLOAT, + unit TEXT, + metadata JSONB +); + +-- Query for trend analysis +SELECT + date_trunc('day', run_date) AS day, + benchmark_name, + AVG(value) AS avg_value, + MIN(value) AS min_value, + MAX(value) AS max_value +FROM benchmark_results +WHERE metric_name = 'search_latency_p50' + AND run_date > NOW() - INTERVAL '30 days' +GROUP BY 1, 2 +ORDER BY 1, 2; +``` + +## Reporting Format + +### Performance Report Template + +```markdown +# RuVector-Postgres Performance Report + +**Date:** 2024-XX-XX +**Version:** 0.X.0 +**Commit:** abc123 + +## Summary + +- Overall performance: **X% faster** than pgvector +- Memory efficiency: **X% less** than competitors +- Recall@10: **0.97** (target: 0.95) + +## Detailed Results + +### Index Build Performance +| Size | HNSW Time | IVFFlat Time | Memory | +|------|-----------|--------------|--------| +| 100K | Xs | Xs | XMB | +| 1M | Xm | Xm | XGB | + +### Search Latency (1M vectors, 768d) +| Metric | HNSW | IVFFlat | Target | +|--------|------|---------|--------| +| p50 | Xms | Xms | <2ms | +| p99 | Xms | Xms | <10ms | +| QPS | X | X | >5000 | + +### Comparison with Competitors +[Charts and tables] + +## Recommendations + +1. For latency-sensitive workloads: Use HNSW with ef_search=64 +2. For memory-constrained: Use IVFFlat with SQ8 quantization +3. For maximum throughput: Enable parallel search with 4 workers +``` + +## Running Benchmarks + +```bash +# Run all micro-benchmarks +cargo bench --features bench + +# Run specific benchmark +cargo bench -- distance + +# Run PostgreSQL benchmarks +./scripts/run_pg_benchmarks.sh + +# Generate comparison report +python scripts/generate_report.py + +# Quick smoke test +cargo bench -- --quick +``` diff --git a/crates/ruvector-postgres/docs/integration-plans/README.md b/crates/ruvector-postgres/docs/integration-plans/README.md new file mode 100644 index 00000000..98b32c64 --- /dev/null +++ b/crates/ruvector-postgres/docs/integration-plans/README.md @@ -0,0 +1,165 @@ +# RuVector-Postgres Integration Plans + +Comprehensive implementation plans for integrating advanced capabilities into the ruvector-postgres PostgreSQL extension. + +## Overview + +These documents outline the roadmap to transform ruvector-postgres from a pgvector-compatible extension into a full-featured AI database with self-learning, attention mechanisms, GNN layers, and more. + +## Current State + +ruvector-postgres v0.1.0 includes: +- ✅ SIMD-optimized distance functions (AVX-512, AVX2, NEON) +- ✅ HNSW index with configurable parameters +- ✅ IVFFlat index for memory-efficient search +- ✅ Scalar (SQ8), Binary, and Product quantization +- ✅ pgvector-compatible SQL interface +- ✅ Parallel query execution + +## Planned Integrations + +| Feature | Document | Priority | Complexity | Est. Weeks | +|---------|----------|----------|------------|------------| +| Self-Learning / ReasoningBank | [01-self-learning.md](./01-self-learning.md) | High | High | 10 | +| Attention Mechanisms (39 types) | [02-attention-mechanisms.md](./02-attention-mechanisms.md) | High | Medium | 12 | +| GNN Layers | [03-gnn-layers.md](./03-gnn-layers.md) | High | High | 12 | +| Hyperbolic Embeddings | [04-hyperbolic-embeddings.md](./04-hyperbolic-embeddings.md) | Medium | Medium | 10 | +| Sparse Vectors | [05-sparse-vectors.md](./05-sparse-vectors.md) | High | Medium | 10 | +| Graph Operations & Cypher | [06-graph-operations.md](./06-graph-operations.md) | High | High | 14 | +| Tiny Dancer Routing | [07-tiny-dancer-routing.md](./07-tiny-dancer-routing.md) | Medium | Medium | 12 | + +## Supporting Documents + +| Document | Description | +|----------|-------------| +| [Optimization Strategy](./08-optimization-strategy.md) | SIMD, memory, query optimization techniques | +| [Benchmarking Plan](./09-benchmarking-plan.md) | Performance testing and comparison methodology | + +## Architecture Principles + +### Modularity +Each feature is implemented as a separate module with feature flags: + +```toml +[features] +# Core (always enabled) +default = ["pg16"] + +# Advanced features (opt-in) +learning = [] +attention = [] +gnn = [] +hyperbolic = [] +sparse = [] +graph = [] +routing = [] + +# Feature bundles +ai-complete = ["learning", "attention", "gnn", "routing"] +graph-complete = ["hyperbolic", "sparse", "graph"] +all = ["ai-complete", "graph-complete"] +``` + +### Dependency Strategy + +``` +ruvector-postgres +├── ruvector-core (shared types, SIMD) +├── ruvector-attention (optional) +├── ruvector-gnn (optional) +├── ruvector-graph (optional) +├── ruvector-tiny-dancer-core (optional) +└── External + ├── pgrx (PostgreSQL FFI) + ├── simsimd (SIMD operations) + └── rayon (parallelism) +``` + +### SQL Interface Design + +All features follow consistent SQL patterns: + +```sql +-- Enable features +SELECT ruvector_enable_feature('learning', table_name := 'embeddings'); + +-- Configuration via GUCs +SET ruvector.learning_rate = 0.01; +SET ruvector.attention_type = 'flash'; + +-- Feature-specific functions prefixed with ruvector_ +SELECT ruvector_attention_score(a, b, 'scaled_dot'); +SELECT ruvector_gnn_search(query, 'edges', num_hops := 2); +SELECT ruvector_route(request, optimize_for := 'cost'); + +-- Cypher queries via dedicated function +SELECT * FROM ruvector_cypher('graph_name', $$ + MATCH (n:Person)-[:KNOWS]->(friend) + RETURN friend.name +$$); +``` + +## Implementation Roadmap + +### Phase 1: Foundation (Months 1-3) +- [ ] Sparse vectors (BM25, SPLADE support) +- [ ] Hyperbolic embeddings (Poincaré ball model) +- [ ] Basic attention operations (scaled dot-product) + +### Phase 2: Graph (Months 4-6) +- [ ] Property graph storage +- [ ] Cypher query parser +- [ ] Basic graph algorithms (BFS, shortest path) +- [ ] Vector-guided traversal + +### Phase 3: Neural (Months 7-9) +- [ ] GNN message passing framework +- [ ] GCN, GraphSAGE, GAT layers +- [ ] Multi-head attention +- [ ] Flash attention + +### Phase 4: Intelligence (Months 10-12) +- [ ] Self-learning trajectory tracking +- [ ] ReasoningBank pattern storage +- [ ] Adaptive search optimization +- [ ] AI agent routing (Tiny Dancer) + +### Phase 5: Production (Months 13-15) +- [ ] Performance optimization +- [ ] Comprehensive benchmarking +- [ ] Documentation and examples +- [ ] Production hardening + +## Performance Targets + +| Metric | Target | Notes | +|--------|--------|-------| +| Vector search (1M, 768d) | <2ms p50 | HNSW with ef=64 | +| Recall@10 | >0.95 | At target latency | +| GNN forward (10K nodes) | <20ms | Single layer | +| Cypher simple query | <5ms | Pattern match | +| Memory overhead | <20% | vs raw vectors | +| Build throughput | >50K vec/s | HNSW M=16 | + +## Contributing + +Each integration plan includes: +1. Architecture diagrams +2. Module structure +3. SQL interface specification +4. Implementation phases with timelines +5. Code examples +6. Benchmark targets +7. Dependencies and feature flags + +When implementing: +1. Start with the module structure +2. Implement core functionality with tests +3. Add PostgreSQL integration +4. Write benchmarks +5. Document SQL interface +6. Update this README + +## License + +MIT License - See main repository for details.