From e860b24b89f84fb2012d8020966ff9861919f3bd Mon Sep 17 00:00:00 2001 From: rUv Date: Sun, 15 Feb 2026 06:15:00 +0000 Subject: [PATCH] fix: HNSW index bugs, agent/SPARQL crashes, lru security (#152, #164, #167, #171, #148) HNSW fixes: - Extract vector dimensions from column atttypmod instead of hardcoding 128, which caused corrupted indexes for non-128-dim embeddings (#171, #164) - Add page boundary checks in read_vector/read_neighbors to prevent segfaults on large tables with >100K rows (#164) - Use BinaryHeap::into_sorted_vec() for deterministic result ordering instead of into_iter() which yields arbitrary order (#171) - Handle non-kNN scans (COUNT, WHERE IS NOT NULL) gracefully by returning false from hnsw_gettuple when no ORDER BY operator is present (#152) Agent/SPARQL fixes: - Fix SQL type mismatch: ruvector_list_agents() and ruvector_find_agents_by_capability() now use RETURNS TABLE(...) matching the Rust TableIterator signatures instead of RETURNS SETOF jsonb (#167) - Add empty query validation to ruvector_sparql() and ruvector_sparql_json() to prevent panics on invalid input (#167) - Change workspace panic profile from "abort" to "unwind" so pgrx can convert Rust panics to PostgreSQL errors instead of killing the backend (#167) Security: - Bump lru dependency from 0.12 to 0.16 in ruvector-graph, ruvector-cli, and ruvLLM to resolve GHSA-xpfx-fvgv-hgqp Stacked Borrows violation (#148) Version bumps: workspace 2.0.3, ruvector-postgres 2.0.2 Co-Authored-By: claude-flow --- Cargo.toml | 4 +- crates/ruvector-cli/Cargo.toml | 2 +- crates/ruvector-graph/Cargo.toml | 2 +- crates/ruvector-postgres/Cargo.toml | 2 +- .../ruvector-postgres/sql/ruvector--2.0.0.sql | 4 +- .../ruvector-postgres/src/graph/operators.rs | 10 +++ crates/ruvector-postgres/src/index/hnsw_am.rs | 86 +++++++++++++++++-- examples/ruvLLM/Cargo.toml | 2 +- 8 files changed, 98 insertions(+), 14 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8e860c17..1b433989 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -99,7 +99,7 @@ members = [ resolver = "2" [workspace.package] -version = "2.0.2" +version = "2.0.3" edition = "2021" rust-version = "1.77" license = "MIT" @@ -171,7 +171,7 @@ opt-level = 3 lto = "fat" codegen-units = 1 strip = true -panic = "abort" +panic = "unwind" [profile.bench] inherits = "release" diff --git a/crates/ruvector-cli/Cargo.toml b/crates/ruvector-cli/Cargo.toml index 85f9d2ed..fd17e442 100644 --- a/crates/ruvector-cli/Cargo.toml +++ b/crates/ruvector-cli/Cargo.toml @@ -31,7 +31,7 @@ tokio-postgres = { version = "0.7", optional = true } deadpool-postgres = { version = "0.14", optional = true } # LRU cache for performance optimization -lru = "0.12" +lru = "0.16" # Compression for storage flate2 = "1.0" diff --git a/crates/ruvector-graph/Cargo.toml b/crates/ruvector-graph/Cargo.toml index 92bb2432..3fa13e2c 100644 --- a/crates/ruvector-graph/Cargo.toml +++ b/crates/ruvector-graph/Cargo.toml @@ -69,7 +69,7 @@ pest_derive = { version = "2.7", optional = true } lalrpop-util = { version = "0.21", optional = true } # Cache -lru = "0.12" +lru = "0.16" moka = { version = "0.12", features = ["future"], optional = true } # Compression (for storage optimization, optional for WASM) diff --git a/crates/ruvector-postgres/Cargo.toml b/crates/ruvector-postgres/Cargo.toml index a49d8b84..7506f822 100644 --- a/crates/ruvector-postgres/Cargo.toml +++ b/crates/ruvector-postgres/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ruvector-postgres" -version = "2.0.1" +version = "2.0.2" edition = "2021" license = "MIT" description = "High-performance PostgreSQL vector database extension v2 - pgvector drop-in replacement with 230+ SQL functions, SIMD acceleration, Flash Attention, GNN layers, hybrid search, multi-tenancy, self-healing, and self-learning capabilities" diff --git a/crates/ruvector-postgres/sql/ruvector--2.0.0.sql b/crates/ruvector-postgres/sql/ruvector--2.0.0.sql index c62b692d..cb5e129e 100644 --- a/crates/ruvector-postgres/sql/ruvector--2.0.0.sql +++ b/crates/ruvector-postgres/sql/ruvector--2.0.0.sql @@ -525,7 +525,7 @@ LANGUAGE C VOLATILE PARALLEL SAFE; -- List all agents CREATE OR REPLACE FUNCTION ruvector_list_agents() -RETURNS SETOF jsonb +RETURNS TABLE(name text, agent_type text, capabilities text[], cost_per_request real, avg_latency_ms real, quality_score real, success_rate real, total_requests bigint, is_active boolean) AS 'MODULE_PATHNAME', 'ruvector_list_agents_wrapper' LANGUAGE C VOLATILE PARALLEL SAFE; @@ -537,7 +537,7 @@ LANGUAGE C VOLATILE PARALLEL SAFE; -- Find agents by capability CREATE OR REPLACE FUNCTION ruvector_find_agents_by_capability(capability text, max_results int DEFAULT 10) -RETURNS SETOF jsonb +RETURNS TABLE(name text, quality_score real, avg_latency_ms real, cost_per_request real) AS 'MODULE_PATHNAME', 'ruvector_find_agents_by_capability_wrapper' LANGUAGE C VOLATILE PARALLEL SAFE; diff --git a/crates/ruvector-postgres/src/graph/operators.rs b/crates/ruvector-postgres/src/graph/operators.rs index 17ab4d17..e09c5d42 100644 --- a/crates/ruvector-postgres/src/graph/operators.rs +++ b/crates/ruvector-postgres/src/graph/operators.rs @@ -324,6 +324,11 @@ fn ruvector_create_rdf_store(name: &str) -> bool { /// ``` #[pg_extern] fn ruvector_sparql(store_name: &str, query: &str, format: &str) -> Result { + // Validate input to prevent panics + if query.trim().is_empty() { + return Err("SPARQL query cannot be empty".to_string()); + } + let store = get_store(store_name) .ok_or_else(|| format!("Triple store '{}' does not exist", store_name))?; @@ -350,6 +355,11 @@ fn ruvector_sparql(store_name: &str, query: &str, format: &str) -> Result Result { + // Validate input to prevent panics that would abort PostgreSQL + if query.trim().is_empty() { + return Err("SPARQL query cannot be empty".to_string()); + } + let result = ruvector_sparql(store_name, query, "json")?; let json_value: JsonValue = diff --git a/crates/ruvector-postgres/src/index/hnsw_am.rs b/crates/ruvector-postgres/src/index/hnsw_am.rs index 617110f8..af9013e7 100644 --- a/crates/ruvector-postgres/src/index/hnsw_am.rs +++ b/crates/ruvector-postgres/src/index/hnsw_am.rs @@ -505,6 +505,21 @@ unsafe fn read_vector( let header = page as *const PageHeaderData; let data_ptr = (header as *const u8).add(size_of::()); + + // Bounds check: prevent reading past page boundary. Fixes #164 segfault. + let page_size = pg_sys::BLCKSZ as usize; + let total_read_end = size_of::() + + size_of::() + + dimensions * size_of::(); + if total_read_end > page_size { + pgrx::warning!( + "HNSW: Vector read would exceed page boundary ({} > {}), skipping block {}", + total_read_end, page_size, block + ); + pg_sys::UnlockReleaseBuffer(buffer); + return None; + } + let vector_ptr = data_ptr.add(size_of::()) as *const f32; let mut vector = Vec::with_capacity(dimensions); @@ -550,6 +565,23 @@ unsafe fn read_neighbors( offset += count * size_of::(); } + // Bounds check: prevent reading past page boundary. Fixes #164 segfault. + let page_size = pg_sys::BLCKSZ as usize; + let header_size = size_of::(); + let total_read_end = header_size + + size_of::() + + vector_size + + offset + + neighbor_count * size_of::(); + if total_read_end > page_size { + pgrx::warning!( + "HNSW: Neighbor read would exceed page boundary ({} > {}), skipping block {}", + total_read_end, page_size, block + ); + pg_sys::UnlockReleaseBuffer(buffer); + return Vec::new(); + } + let neighbors_ptr = neighbors_base.add(offset) as *const HnswNeighbor; let mut neighbors = Vec::with_capacity(neighbor_count); for i in 0..neighbor_count { @@ -712,16 +744,16 @@ unsafe fn hnsw_search( } } - // Convert to sorted result vector + // Convert to sorted result vector. + // Use into_sorted_vec() for deterministic ordering instead of into_iter() + // which yields arbitrary order from BinaryHeap. Fixes #171. let mut result_vec: Vec<_> = results + .into_sorted_vec() .into_iter() .take(k) .map(|r| (r.block, r.tid, r.distance)) .collect(); - result_vec.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(Ordering::Equal)); - result_vec.truncate(k); - result_vec } @@ -738,8 +770,32 @@ unsafe extern "C" fn hnsw_build( ) -> *mut IndexBuildResult { pgrx::log!("HNSW v2: Starting index build"); - // Get dimensions from first tuple or index definition - let dimensions = 128; // TODO: Extract from index column definition + // Extract dimensions from the indexed column's type modifier (atttypmod). + // For ruvector(384), atttypmod == 384. Fixes #171 and #164. + let dimensions = { + let tupdesc = (*heap).rd_att; + let natts = (*index_info).ii_NumIndexAttrs as isize; + let mut dims: u32 = 0; + if natts > 0 && !tupdesc.is_null() { + let attnum = *(*index_info).ii_IndexAttrNumbers.offset(0); + if attnum > 0 && (attnum as isize) <= (*tupdesc).natts as isize { + let attr = (*tupdesc).attrs.as_ptr().offset((attnum - 1) as isize); + let typmod = (*attr).atttypmod; + if typmod > 0 { + dims = typmod as u32; + } + } + } + if dims == 0 { + pgrx::warning!( + "HNSW: Could not determine vector dimensions from column type modifier, \ + defaulting to 384. Ensure column is defined as ruvector(N)." + ); + dims = 384; + } + pgrx::log!("HNSW v2: Building index with {} dimensions", dims); + dims as usize + }; let config = HnswConfig::default(); // Parse options from WITH clause @@ -1399,6 +1455,14 @@ unsafe extern "C" fn hnsw_rescan( state.search_done = false; state.query_valid = false; // Reset validity flag + // Non-kNN scan (e.g., COUNT(*), WHERE embedding IS NOT NULL) + // When there are no ORDER BY operators, we cannot perform a vector search. + // Return early and let hnsw_gettuple return false, forcing PostgreSQL to + // fall back to a sequential scan. Fixes #152. + if norderbys <= 0 || orderbys.is_null() { + return; + } + // Extract query vector from ORDER BY if norderbys > 0 && !orderbys.is_null() { let orderby = &*orderbys; @@ -1483,6 +1547,9 @@ unsafe extern "C" fn hnsw_rescan( } // Validate query vector - CRITICAL: Prevent crashes from invalid queries + // Note: if query_valid is false due to norderbys==0 (non-kNN scan), + // we already returned early above. This check only fires for kNN scans + // where vector extraction genuinely failed. if !state.query_valid || state.query_vector.is_empty() { // Instead of using zeros which crash, raise a proper error pgrx::error!( @@ -1577,6 +1644,13 @@ unsafe extern "C" fn hnsw_gettuple(scan: IndexScanDesc, direction: ScanDirection let state = &mut *((*scan).opaque as *mut HnswScanState); let index = (*scan).indexRelation; + // Non-kNN scan: no query vector was provided (e.g., COUNT(*), WHERE IS NOT NULL). + // Return false to tell PostgreSQL this index cannot satisfy this scan type, + // forcing fallback to sequential scan. Fixes #152. + if !state.query_valid && !state.search_done { + return false; + } + // Execute search on first call if !state.search_done { let (meta_page, meta_buffer) = get_meta_page(index); diff --git a/examples/ruvLLM/Cargo.toml b/examples/ruvLLM/Cargo.toml index e8ea366b..22332410 100644 --- a/examples/ruvLLM/Cargo.toml +++ b/examples/ruvLLM/Cargo.toml @@ -64,7 +64,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] } # Performance dashmap = "6.1" parking_lot = "0.12" -lru = "0.12" +lru = "0.16" rayon = "1.10" crossbeam = "0.8" once_cell = "1.20"