fix: HNSW index bugs, agent/SPARQL crashes, lru security (#152, #164, #167, #171, #148)

HNSW fixes:
- Extract vector dimensions from column atttypmod instead of hardcoding 128,
  which caused corrupted indexes for non-128-dim embeddings (#171, #164)
- Add page boundary checks in read_vector/read_neighbors to prevent
  segfaults on large tables with >100K rows (#164)
- Use BinaryHeap::into_sorted_vec() for deterministic result ordering
  instead of into_iter() which yields arbitrary order (#171)
- Handle non-kNN scans (COUNT, WHERE IS NOT NULL) gracefully by returning
  false from hnsw_gettuple when no ORDER BY operator is present (#152)

Agent/SPARQL fixes:
- Fix SQL type mismatch: ruvector_list_agents() and
  ruvector_find_agents_by_capability() now use RETURNS TABLE(...)
  matching the Rust TableIterator signatures instead of RETURNS SETOF jsonb (#167)
- Add empty query validation to ruvector_sparql() and
  ruvector_sparql_json() to prevent panics on invalid input (#167)
- Change workspace panic profile from "abort" to "unwind" so pgrx can
  convert Rust panics to PostgreSQL errors instead of killing the backend (#167)

Security:
- Bump lru dependency from 0.12 to 0.16 in ruvector-graph, ruvector-cli,
  and ruvLLM to resolve GHSA-xpfx-fvgv-hgqp Stacked Borrows violation (#148)

Version bumps: workspace 2.0.3, ruvector-postgres 2.0.2

Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
rUv 2026-02-15 06:15:00 +00:00
parent 2984452426
commit e860b24b89
8 changed files with 98 additions and 14 deletions

View file

@ -99,7 +99,7 @@ members = [
resolver = "2"
[workspace.package]
version = "2.0.2"
version = "2.0.3"
edition = "2021"
rust-version = "1.77"
license = "MIT"
@ -171,7 +171,7 @@ opt-level = 3
lto = "fat"
codegen-units = 1
strip = true
panic = "abort"
panic = "unwind"
[profile.bench]
inherits = "release"

View file

@ -31,7 +31,7 @@ tokio-postgres = { version = "0.7", optional = true }
deadpool-postgres = { version = "0.14", optional = true }
# LRU cache for performance optimization
lru = "0.12"
lru = "0.16"
# Compression for storage
flate2 = "1.0"

View file

@ -69,7 +69,7 @@ pest_derive = { version = "2.7", optional = true }
lalrpop-util = { version = "0.21", optional = true }
# Cache
lru = "0.12"
lru = "0.16"
moka = { version = "0.12", features = ["future"], optional = true }
# Compression (for storage optimization, optional for WASM)

View file

@ -1,6 +1,6 @@
[package]
name = "ruvector-postgres"
version = "2.0.1"
version = "2.0.2"
edition = "2021"
license = "MIT"
description = "High-performance PostgreSQL vector database extension v2 - pgvector drop-in replacement with 230+ SQL functions, SIMD acceleration, Flash Attention, GNN layers, hybrid search, multi-tenancy, self-healing, and self-learning capabilities"

View file

@ -525,7 +525,7 @@ LANGUAGE C VOLATILE PARALLEL SAFE;
-- List all agents
CREATE OR REPLACE FUNCTION ruvector_list_agents()
RETURNS SETOF jsonb
RETURNS TABLE(name text, agent_type text, capabilities text[], cost_per_request real, avg_latency_ms real, quality_score real, success_rate real, total_requests bigint, is_active boolean)
AS 'MODULE_PATHNAME', 'ruvector_list_agents_wrapper'
LANGUAGE C VOLATILE PARALLEL SAFE;
@ -537,7 +537,7 @@ LANGUAGE C VOLATILE PARALLEL SAFE;
-- Find agents by capability
CREATE OR REPLACE FUNCTION ruvector_find_agents_by_capability(capability text, max_results int DEFAULT 10)
RETURNS SETOF jsonb
RETURNS TABLE(name text, quality_score real, avg_latency_ms real, cost_per_request real)
AS 'MODULE_PATHNAME', 'ruvector_find_agents_by_capability_wrapper'
LANGUAGE C VOLATILE PARALLEL SAFE;

View file

@ -324,6 +324,11 @@ fn ruvector_create_rdf_store(name: &str) -> bool {
/// ```
#[pg_extern]
fn ruvector_sparql(store_name: &str, query: &str, format: &str) -> Result<String, String> {
// Validate input to prevent panics
if query.trim().is_empty() {
return Err("SPARQL query cannot be empty".to_string());
}
let store = get_store(store_name)
.ok_or_else(|| format!("Triple store '{}' does not exist", store_name))?;
@ -350,6 +355,11 @@ fn ruvector_sparql(store_name: &str, query: &str, format: &str) -> Result<String
/// ```
#[pg_extern]
fn ruvector_sparql_json(store_name: &str, query: &str) -> Result<JsonB, String> {
// Validate input to prevent panics that would abort PostgreSQL
if query.trim().is_empty() {
return Err("SPARQL query cannot be empty".to_string());
}
let result = ruvector_sparql(store_name, query, "json")?;
let json_value: JsonValue =

View file

@ -505,6 +505,21 @@ unsafe fn read_vector(
let header = page as *const PageHeaderData;
let data_ptr = (header as *const u8).add(size_of::<PageHeaderData>());
// Bounds check: prevent reading past page boundary. Fixes #164 segfault.
let page_size = pg_sys::BLCKSZ as usize;
let total_read_end = size_of::<PageHeaderData>()
+ size_of::<HnswNodePageHeader>()
+ dimensions * size_of::<f32>();
if total_read_end > page_size {
pgrx::warning!(
"HNSW: Vector read would exceed page boundary ({} > {}), skipping block {}",
total_read_end, page_size, block
);
pg_sys::UnlockReleaseBuffer(buffer);
return None;
}
let vector_ptr = data_ptr.add(size_of::<HnswNodePageHeader>()) as *const f32;
let mut vector = Vec::with_capacity(dimensions);
@ -550,6 +565,23 @@ unsafe fn read_neighbors(
offset += count * size_of::<HnswNeighbor>();
}
// Bounds check: prevent reading past page boundary. Fixes #164 segfault.
let page_size = pg_sys::BLCKSZ as usize;
let header_size = size_of::<PageHeaderData>();
let total_read_end = header_size
+ size_of::<HnswNodePageHeader>()
+ vector_size
+ offset
+ neighbor_count * size_of::<HnswNeighbor>();
if total_read_end > page_size {
pgrx::warning!(
"HNSW: Neighbor read would exceed page boundary ({} > {}), skipping block {}",
total_read_end, page_size, block
);
pg_sys::UnlockReleaseBuffer(buffer);
return Vec::new();
}
let neighbors_ptr = neighbors_base.add(offset) as *const HnswNeighbor;
let mut neighbors = Vec::with_capacity(neighbor_count);
for i in 0..neighbor_count {
@ -712,16 +744,16 @@ unsafe fn hnsw_search(
}
}
// Convert to sorted result vector
// Convert to sorted result vector.
// Use into_sorted_vec() for deterministic ordering instead of into_iter()
// which yields arbitrary order from BinaryHeap. Fixes #171.
let mut result_vec: Vec<_> = results
.into_sorted_vec()
.into_iter()
.take(k)
.map(|r| (r.block, r.tid, r.distance))
.collect();
result_vec.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(Ordering::Equal));
result_vec.truncate(k);
result_vec
}
@ -738,8 +770,32 @@ unsafe extern "C" fn hnsw_build(
) -> *mut IndexBuildResult {
pgrx::log!("HNSW v2: Starting index build");
// Get dimensions from first tuple or index definition
let dimensions = 128; // TODO: Extract from index column definition
// Extract dimensions from the indexed column's type modifier (atttypmod).
// For ruvector(384), atttypmod == 384. Fixes #171 and #164.
let dimensions = {
let tupdesc = (*heap).rd_att;
let natts = (*index_info).ii_NumIndexAttrs as isize;
let mut dims: u32 = 0;
if natts > 0 && !tupdesc.is_null() {
let attnum = *(*index_info).ii_IndexAttrNumbers.offset(0);
if attnum > 0 && (attnum as isize) <= (*tupdesc).natts as isize {
let attr = (*tupdesc).attrs.as_ptr().offset((attnum - 1) as isize);
let typmod = (*attr).atttypmod;
if typmod > 0 {
dims = typmod as u32;
}
}
}
if dims == 0 {
pgrx::warning!(
"HNSW: Could not determine vector dimensions from column type modifier, \
defaulting to 384. Ensure column is defined as ruvector(N)."
);
dims = 384;
}
pgrx::log!("HNSW v2: Building index with {} dimensions", dims);
dims as usize
};
let config = HnswConfig::default();
// Parse options from WITH clause
@ -1399,6 +1455,14 @@ unsafe extern "C" fn hnsw_rescan(
state.search_done = false;
state.query_valid = false; // Reset validity flag
// Non-kNN scan (e.g., COUNT(*), WHERE embedding IS NOT NULL)
// When there are no ORDER BY operators, we cannot perform a vector search.
// Return early and let hnsw_gettuple return false, forcing PostgreSQL to
// fall back to a sequential scan. Fixes #152.
if norderbys <= 0 || orderbys.is_null() {
return;
}
// Extract query vector from ORDER BY
if norderbys > 0 && !orderbys.is_null() {
let orderby = &*orderbys;
@ -1483,6 +1547,9 @@ unsafe extern "C" fn hnsw_rescan(
}
// Validate query vector - CRITICAL: Prevent crashes from invalid queries
// Note: if query_valid is false due to norderbys==0 (non-kNN scan),
// we already returned early above. This check only fires for kNN scans
// where vector extraction genuinely failed.
if !state.query_valid || state.query_vector.is_empty() {
// Instead of using zeros which crash, raise a proper error
pgrx::error!(
@ -1577,6 +1644,13 @@ unsafe extern "C" fn hnsw_gettuple(scan: IndexScanDesc, direction: ScanDirection
let state = &mut *((*scan).opaque as *mut HnswScanState);
let index = (*scan).indexRelation;
// Non-kNN scan: no query vector was provided (e.g., COUNT(*), WHERE IS NOT NULL).
// Return false to tell PostgreSQL this index cannot satisfy this scan type,
// forcing fallback to sequential scan. Fixes #152.
if !state.query_valid && !state.search_done {
return false;
}
// Execute search on first call
if !state.search_done {
let (meta_page, meta_buffer) = get_meta_page(index);

View file

@ -64,7 +64,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
# Performance
dashmap = "6.1"
parking_lot = "0.12"
lru = "0.12"
lru = "0.16"
rayon = "1.10"
crossbeam = "0.8"
once_cell = "1.20"