diff --git a/npm/packages/postgres-cli/benchmarks/run_benchmarks_optimized.sql b/npm/packages/postgres-cli/benchmarks/run_benchmarks_optimized.sql new file mode 100644 index 00000000..c3def7a7 --- /dev/null +++ b/npm/packages/postgres-cli/benchmarks/run_benchmarks_optimized.sql @@ -0,0 +1,171 @@ +-- RuVector Optimized Benchmark Runner +-- Tests performance of optimized operations + +\timing on + +-- ============================================================================ +-- Test 1: HNSW Vector Search (Target: ~24ms for 1000 vectors) +-- ============================================================================ +\echo '=== Test 1: HNSW Vector Search ===' + +-- Warm up +SELECT id, embedding <-> ruvector_random(128) AS distance +FROM benchmark_vectors +ORDER BY distance +LIMIT 10; + +-- Benchmark: Find 10 nearest neighbors +EXPLAIN ANALYZE +SELECT id, embedding <-> ruvector_random(128) AS distance +FROM benchmark_vectors +ORDER BY distance +LIMIT 10; + +-- ============================================================================ +-- Test 2: Hamming Distance with bit_count (Target: ~7.6ms) +-- ============================================================================ +\echo '=== Test 2: Hamming Distance ===' + +EXPLAIN ANALYZE +SELECT + a.id AS id_a, + b.id AS id_b, + bench_hamming_distance(a.binary_quantized, b.binary_quantized) AS hamming_dist +FROM benchmark_quantized a +CROSS JOIN benchmark_quantized b +WHERE a.id < b.id +LIMIT 1000; + +-- ============================================================================ +-- Test 3: Full-Text Search with GIN (Target: ~3.5ms) +-- ============================================================================ +\echo '=== Test 3: Full-Text Search ===' + +EXPLAIN ANALYZE +SELECT id, content, ts_rank(content_tsvector, query) AS rank +FROM benchmark_documents, plainto_tsquery('english', 'vector database search') query +WHERE content_tsvector @@ query +ORDER BY rank DESC +LIMIT 20; + +-- ============================================================================ +-- Test 4: GraphSAGE Aggregation (Target: ~2.6ms) +-- ============================================================================ +\echo '=== Test 4: GraphSAGE Neighbor Aggregation ===' + +EXPLAIN ANALYZE +WITH neighbor_features AS ( + SELECT + e.source_id, + ruvector_mean(ARRAY_AGG(n.features)) AS mean_neighbor + FROM benchmark_edges e + JOIN benchmark_nodes n ON e.target_id = n.id + GROUP BY e.source_id +) +SELECT + s.id, + ruvector_concat(s.features, COALESCE(nf.mean_neighbor, s.features)) AS aggregated +FROM benchmark_nodes s +LEFT JOIN neighbor_features nf ON s.id = nf.source_id +LIMIT 50; + +-- ============================================================================ +-- Test 5: Sparse Vector Dot Product (Target: ~27ms) +-- ============================================================================ +\echo '=== Test 5: Sparse Dot Product ===' + +EXPLAIN ANALYZE +SELECT + a.id AS id_a, + b.id AS id_b, + bench_sparse_dot(a.sparse_embedding, b.sparse_embedding) AS similarity +FROM benchmark_documents a +CROSS JOIN benchmark_documents b +WHERE a.id < b.id +LIMIT 500; + +-- ============================================================================ +-- Test 6: Graph Edge Lookup (Target: ~5ms) +-- ============================================================================ +\echo '=== Test 6: Graph Edge Lookup ===' + +EXPLAIN ANALYZE +SELECT + e.*, + s.features AS source_features, + t.features AS target_features +FROM benchmark_edges e +JOIN benchmark_nodes s ON e.source_id = s.id +JOIN benchmark_nodes t ON e.target_id = t.id +WHERE e.source_id IN (SELECT id FROM benchmark_nodes ORDER BY random() LIMIT 10); + +-- ============================================================================ +-- Test 7: Scalar Quantization Compression (Target: ~75ms) +-- ============================================================================ +\echo '=== Test 7: Scalar Quantization ===' + +EXPLAIN ANALYZE +SELECT + id, + octet_length(scalar_quantized) AS compressed_size, + ruvector_dim(original) * 4 AS original_size, + ROUND(100.0 * octet_length(scalar_quantized) / (ruvector_dim(original) * 4), 2) AS compression_ratio +FROM benchmark_quantized +LIMIT 100; + +-- ============================================================================ +-- Test 8: Binary Quantization + Hamming (Target: ~85ms) +-- ============================================================================ +\echo '=== Test 8: Binary Quantization Search ===' + +EXPLAIN ANALYZE +WITH query_binary AS ( + SELECT ruvector_binary_quantize(ruvector_random(128)) AS q +) +SELECT + bq.id, + bench_hamming_distance(bq.binary_quantized, query_binary.q) AS hamming_dist +FROM benchmark_quantized bq, query_binary +ORDER BY hamming_dist +LIMIT 20; + +-- ============================================================================ +-- Summary +-- ============================================================================ +\echo '=== Benchmark Summary ===' +SELECT + 'benchmark_vectors' AS table_name, + COUNT(*) AS row_count, + pg_size_pretty(pg_relation_size('benchmark_vectors')) AS table_size, + pg_size_pretty(pg_indexes_size('benchmark_vectors')) AS index_size +FROM benchmark_vectors +UNION ALL +SELECT + 'benchmark_documents', + COUNT(*), + pg_size_pretty(pg_relation_size('benchmark_documents')), + pg_size_pretty(pg_indexes_size('benchmark_documents')) +FROM benchmark_documents +UNION ALL +SELECT + 'benchmark_nodes', + COUNT(*), + pg_size_pretty(pg_relation_size('benchmark_nodes')), + pg_size_pretty(pg_indexes_size('benchmark_nodes')) +FROM benchmark_nodes +UNION ALL +SELECT + 'benchmark_edges', + COUNT(*), + pg_size_pretty(pg_relation_size('benchmark_edges')), + pg_size_pretty(pg_indexes_size('benchmark_edges')) +FROM benchmark_edges +UNION ALL +SELECT + 'benchmark_quantized', + COUNT(*), + pg_size_pretty(pg_relation_size('benchmark_quantized')), + pg_size_pretty(pg_indexes_size('benchmark_quantized')) +FROM benchmark_quantized; + +\timing off diff --git a/npm/packages/postgres-cli/benchmarks/ruvector_benchmark_optimized.sql b/npm/packages/postgres-cli/benchmarks/ruvector_benchmark_optimized.sql new file mode 100644 index 00000000..f7532f6e --- /dev/null +++ b/npm/packages/postgres-cli/benchmarks/ruvector_benchmark_optimized.sql @@ -0,0 +1,145 @@ +-- RuVector Optimized Benchmark Setup +-- Performance-optimized schema with indexes and parallel-safe functions + +-- Enable extension +CREATE EXTENSION IF NOT EXISTS ruvector; + +-- ============================================================================ +-- Optimized Vector Table with HNSW Index +-- ============================================================================ +DROP TABLE IF EXISTS benchmark_vectors CASCADE; +CREATE TABLE benchmark_vectors ( + id SERIAL PRIMARY KEY, + embedding ruvector, + category TEXT, + created_at TIMESTAMPTZ DEFAULT NOW() +); + +-- Insert test vectors (1000 random 128-dim vectors) +INSERT INTO benchmark_vectors (embedding, category) +SELECT + ruvector_random(128), + 'category_' || (random() * 10)::int +FROM generate_series(1, 1000); + +-- Create HNSW index for fast similarity search +-- m=16: connections per layer, ef_construction=100: build-time accuracy +CREATE INDEX IF NOT EXISTS idx_vectors_hnsw +ON benchmark_vectors USING hnsw (embedding ruvector_cosine_ops) +WITH (m = 16, ef_construction = 100); + +-- ============================================================================ +-- Optimized Full-Text Search with GIN Index +-- ============================================================================ +DROP TABLE IF EXISTS benchmark_documents CASCADE; +CREATE TABLE benchmark_documents ( + id SERIAL PRIMARY KEY, + content TEXT, + content_tsvector TSVECTOR GENERATED ALWAYS AS (to_tsvector('english', content)) STORED, + sparse_embedding TEXT, + created_at TIMESTAMPTZ DEFAULT NOW() +); + +-- Insert test documents +INSERT INTO benchmark_documents (content, sparse_embedding) +SELECT + 'Document ' || i || ' contains words like vector database similarity search embedding neural network', + ruvector_sparse_from_dense(ARRAY[random(), 0, random(), 0, random(), 0, random(), 0]::float4[]) +FROM generate_series(1, 500) i; + +-- GIN index for full-text search +CREATE INDEX IF NOT EXISTS idx_documents_fts +ON benchmark_documents USING gin (content_tsvector); + +-- ============================================================================ +-- Optimized Graph Tables with B-tree Indexes +-- ============================================================================ +DROP TABLE IF EXISTS benchmark_edges CASCADE; +DROP TABLE IF EXISTS benchmark_nodes CASCADE; + +CREATE TABLE benchmark_nodes ( + id SERIAL PRIMARY KEY, + features ruvector, + node_type TEXT +); + +CREATE TABLE benchmark_edges ( + id SERIAL PRIMARY KEY, + source_id INT REFERENCES benchmark_nodes(id), + target_id INT REFERENCES benchmark_nodes(id), + edge_type TEXT, + weight FLOAT DEFAULT 1.0 +); + +-- Insert test graph data +INSERT INTO benchmark_nodes (features, node_type) +SELECT + ruvector_random(64), + 'type_' || (random() * 5)::int +FROM generate_series(1, 200); + +INSERT INTO benchmark_edges (source_id, target_id, edge_type, weight) +SELECT + (random() * 199 + 1)::int, + (random() * 199 + 1)::int, + 'edge_' || (random() * 3)::int, + random() +FROM generate_series(1, 1000); + +-- B-tree indexes for fast edge lookups +CREATE INDEX IF NOT EXISTS idx_edges_source ON benchmark_edges(source_id); +CREATE INDEX IF NOT EXISTS idx_edges_target ON benchmark_edges(target_id); +CREATE INDEX IF NOT EXISTS idx_edges_source_target ON benchmark_edges(source_id, target_id); + +-- ============================================================================ +-- Optimized Quantization Tables +-- ============================================================================ +DROP TABLE IF EXISTS benchmark_quantized CASCADE; +CREATE TABLE benchmark_quantized ( + id SERIAL PRIMARY KEY, + original ruvector, + binary_quantized BIT VARYING, + scalar_quantized BYTEA +); + +-- Insert and quantize vectors +INSERT INTO benchmark_quantized (original, binary_quantized, scalar_quantized) +SELECT + v.embedding, + ruvector_binary_quantize(v.embedding), + ruvector_scalar_quantize(v.embedding, 8) +FROM benchmark_vectors v +LIMIT 500; + +-- ============================================================================ +-- Parallel-Safe Helper Functions +-- ============================================================================ + +-- Parallel-safe cosine distance function +CREATE OR REPLACE FUNCTION bench_cosine_distance(a ruvector, b ruvector) +RETURNS float8 AS $$ + SELECT ruvector_distance(a, b, 'cosine') +$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; + +-- Parallel-safe Hamming distance using bit_count +CREATE OR REPLACE FUNCTION bench_hamming_distance(a BIT VARYING, b BIT VARYING) +RETURNS int AS $$ + SELECT bit_count(a # b)::int +$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; + +-- Parallel-safe sparse dot product +CREATE OR REPLACE FUNCTION bench_sparse_dot(a TEXT, b TEXT) +RETURNS float8 AS $$ + SELECT ruvector_sparse_distance(a, b, 'cosine') +$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; + +-- ============================================================================ +-- Statistics Update +-- ============================================================================ +ANALYZE benchmark_vectors; +ANALYZE benchmark_documents; +ANALYZE benchmark_nodes; +ANALYZE benchmark_edges; +ANALYZE benchmark_quantized; + +SELECT 'Optimized benchmark setup complete' AS status; diff --git a/npm/packages/postgres-cli/package.json b/npm/packages/postgres-cli/package.json index 6be055d0..29da5a5d 100644 --- a/npm/packages/postgres-cli/package.json +++ b/npm/packages/postgres-cli/package.json @@ -1,6 +1,6 @@ { "name": "@ruvector/postgres-cli", - "version": "0.2.1", + "version": "0.2.2", "description": "Advanced AI vector database CLI for PostgreSQL - pgvector drop-in replacement with 53+ SQL functions, 39 attention mechanisms, GNN layers, hyperbolic embeddings, and self-learning capabilities", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -86,6 +86,7 @@ }, "files": [ "dist", + "benchmarks", "README.md" ], "publishConfig": { diff --git a/npm/packages/postgres-cli/src/cli.ts b/npm/packages/postgres-cli/src/cli.ts index f32450ca..8e15f598 100644 --- a/npm/packages/postgres-cli/src/cli.ts +++ b/npm/packages/postgres-cli/src/cli.ts @@ -17,6 +17,7 @@ import { Command } from 'commander'; import chalk from 'chalk'; +import { createRequire } from 'module'; import { RuVectorClient } from './client.js'; import { VectorCommands } from './commands/vector.js'; import { AttentionCommands } from './commands/attention.js'; @@ -30,12 +31,16 @@ import { RoutingCommands } from './commands/routing.js'; import { QuantizationCommands } from './commands/quantization.js'; import { InstallCommands } from './commands/install.js'; +// Read version from package.json +const require = createRequire(import.meta.url); +const pkg = require('../package.json'); + const program = new Command(); program .name('ruvector-pg') .description('RuVector PostgreSQL CLI - Advanced AI Vector Database Extension') - .version('0.2.0') + .version(pkg.version) .option('-c, --connection ', 'PostgreSQL connection string', 'postgresql://localhost:5432/ruvector') .option('-v, --verbose', 'Enable verbose output');