ruvector/crates/ruvector-postgres/tests/hnsw_index_tests.sql

322 lines
9.8 KiB
SQL

-- ============================================================================
-- HNSW Index Test Suite
-- ============================================================================
-- Comprehensive tests for HNSW index access method
--
-- Run with: psql -d testdb -f hnsw_index_tests.sql
\set ECHO all
\set ON_ERROR_STOP on
-- Create test database if needed
-- CREATE DATABASE hnsw_test;
-- \c hnsw_test
-- Load extension
CREATE EXTENSION IF NOT EXISTS ruvector;
-- ============================================================================
-- Test 1: Basic Index Creation
-- ============================================================================
\echo '=== Test 1: Basic HNSW Index Creation ==='
CREATE TABLE test_vectors (
id SERIAL PRIMARY KEY,
embedding real[]
);
-- Insert test data (3D vectors)
INSERT INTO test_vectors (embedding) VALUES
(ARRAY[0.0, 0.0, 0.0]::real[]),
(ARRAY[1.0, 0.0, 0.0]::real[]),
(ARRAY[0.0, 1.0, 0.0]::real[]),
(ARRAY[0.0, 0.0, 1.0]::real[]),
(ARRAY[1.0, 1.0, 0.0]::real[]),
(ARRAY[1.0, 0.0, 1.0]::real[]),
(ARRAY[0.0, 1.0, 1.0]::real[]),
(ARRAY[1.0, 1.0, 1.0]::real[]),
(ARRAY[0.5, 0.5, 0.5]::real[]),
(ARRAY[0.2, 0.3, 0.1]::real[]);
-- Create HNSW index with default options (L2 distance)
CREATE INDEX test_vectors_hnsw_l2_idx ON test_vectors USING hnsw (embedding hnsw_l2_ops);
-- Verify index was created
SELECT indexname, indexdef
FROM pg_indexes
WHERE tablename = 'test_vectors';
-- ============================================================================
-- Test 2: L2 Distance Queries
-- ============================================================================
\echo '=== Test 2: L2 Distance Queries ==='
-- Query nearest neighbors to origin [0, 0, 0]
SELECT id, embedding, embedding <-> ARRAY[0.0, 0.0, 0.0]::real[] AS distance
FROM test_vectors
ORDER BY embedding <-> ARRAY[0.0, 0.0, 0.0]::real[]
LIMIT 5;
-- Query nearest neighbors to [1, 1, 1]
SELECT id, embedding, embedding <-> ARRAY[1.0, 1.0, 1.0]::real[] AS distance
FROM test_vectors
ORDER BY embedding <-> ARRAY[1.0, 1.0, 1.0]::real[]
LIMIT 5;
-- ============================================================================
-- Test 3: Index with Custom Options
-- ============================================================================
\echo '=== Test 3: HNSW Index with Custom Options ==='
CREATE TABLE test_vectors_opts (
id SERIAL PRIMARY KEY,
embedding real[]
);
-- Insert larger dataset
INSERT INTO test_vectors_opts (embedding)
SELECT ARRAY[random(), random(), random()]::real[]
FROM generate_series(1, 1000);
-- Create index with custom parameters
CREATE INDEX test_vectors_opts_hnsw_idx ON test_vectors_opts
USING hnsw (embedding hnsw_l2_ops)
WITH (m = 32, ef_construction = 128);
-- Verify index was created with options
SELECT indexname, indexdef
FROM pg_indexes
WHERE tablename = 'test_vectors_opts';
-- Query performance test
\timing on
SELECT id, embedding <-> ARRAY[0.5, 0.5, 0.5]::real[] AS distance
FROM test_vectors_opts
ORDER BY embedding <-> ARRAY[0.5, 0.5, 0.5]::real[]
LIMIT 10;
\timing off
-- ============================================================================
-- Test 4: Cosine Distance Index
-- ============================================================================
\echo '=== Test 4: Cosine Distance Index ==='
CREATE TABLE test_vectors_cosine (
id SERIAL PRIMARY KEY,
embedding real[]
);
-- Insert normalized vectors for cosine similarity
INSERT INTO test_vectors_cosine (embedding)
SELECT vector_normalize(ARRAY[random(), random(), random()]::real[])
FROM generate_series(1, 100);
-- Create HNSW index with cosine distance
CREATE INDEX test_vectors_cosine_idx ON test_vectors_cosine
USING hnsw (embedding hnsw_cosine_ops);
-- Query with cosine distance
SELECT id, embedding <=> ARRAY[1.0, 0.0, 0.0]::real[] AS cosine_dist
FROM test_vectors_cosine
ORDER BY embedding <=> ARRAY[1.0, 0.0, 0.0]::real[]
LIMIT 5;
-- ============================================================================
-- Test 5: Inner Product Index
-- ============================================================================
\echo '=== Test 5: Inner Product Index ==='
CREATE TABLE test_vectors_ip (
id SERIAL PRIMARY KEY,
embedding real[]
);
-- Insert test vectors
INSERT INTO test_vectors_ip (embedding)
SELECT ARRAY[random() * 10, random() * 10, random() * 10]::real[]
FROM generate_series(1, 100);
-- Create HNSW index with inner product
CREATE INDEX test_vectors_ip_idx ON test_vectors_ip
USING hnsw (embedding hnsw_ip_ops);
-- Query with inner product (finds vectors with largest inner product)
SELECT id, embedding <#> ARRAY[1.0, 1.0, 1.0]::real[] AS neg_ip
FROM test_vectors_ip
ORDER BY embedding <#> ARRAY[1.0, 1.0, 1.0]::real[]
LIMIT 5;
-- ============================================================================
-- Test 6: High-Dimensional Vectors
-- ============================================================================
\echo '=== Test 6: High-Dimensional Vectors (128D) ==='
CREATE TABLE test_vectors_high_dim (
id SERIAL PRIMARY KEY,
embedding real[]
);
-- Insert 128-dimensional vectors
INSERT INTO test_vectors_high_dim (embedding)
SELECT array_agg(random())::real[]
FROM generate_series(1, 500),
generate_series(1, 128)
GROUP BY 1;
-- Create HNSW index
CREATE INDEX test_vectors_high_dim_idx ON test_vectors_high_dim
USING hnsw (embedding hnsw_l2_ops)
WITH (m = 16, ef_construction = 64);
-- Query 128D vectors
\set query_vec 'SELECT array_agg(random())::real[] FROM generate_series(1, 128)'
SELECT id, embedding <-> (:query_vec) AS distance
FROM test_vectors_high_dim
ORDER BY embedding <-> (:query_vec)
LIMIT 5;
-- ============================================================================
-- Test 7: Index Maintenance
-- ============================================================================
\echo '=== Test 7: Index Maintenance ==='
-- Get memory statistics
SELECT ruvector_memory_stats();
-- Perform index maintenance
SELECT ruvector_index_maintenance('test_vectors_hnsw_l2_idx');
-- Check index size
SELECT
indexname,
pg_size_pretty(pg_relation_size(indexname::regclass)) AS index_size
FROM pg_indexes
WHERE tablename LIKE 'test_vectors%';
-- ============================================================================
-- Test 8: Insert/Delete Operations
-- ============================================================================
\echo '=== Test 8: Insert and Delete Operations ==='
-- Insert new vectors
INSERT INTO test_vectors (embedding)
SELECT ARRAY[random(), random(), random()]::real[]
FROM generate_series(1, 100);
-- Query after insert
SELECT COUNT(*) FROM test_vectors;
-- Delete some vectors
DELETE FROM test_vectors WHERE id % 2 = 0;
-- Query after delete
SELECT COUNT(*) FROM test_vectors;
-- Verify index still works
SELECT id, embedding <-> ARRAY[0.5, 0.5, 0.5]::real[] AS distance
FROM test_vectors
ORDER BY embedding <-> ARRAY[0.5, 0.5, 0.5]::real[]
LIMIT 5;
-- ============================================================================
-- Test 9: Query Plan Analysis
-- ============================================================================
\echo '=== Test 9: Query Plan Analysis ==='
-- Explain query plan for HNSW index scan
EXPLAIN (ANALYZE, BUFFERS)
SELECT id, embedding <-> ARRAY[0.5, 0.5, 0.5]::real[] AS distance
FROM test_vectors_opts
ORDER BY embedding <-> ARRAY[0.5, 0.5, 0.5]::real[]
LIMIT 10;
-- ============================================================================
-- Test 10: Session Parameter Testing
-- ============================================================================
\echo '=== Test 10: Session Parameter Testing ==='
-- Show current ef_search setting
SHOW ruvector.ef_search;
-- Increase ef_search for better recall
SET ruvector.ef_search = 100;
-- Run query with increased ef_search
SELECT id, embedding <-> ARRAY[0.5, 0.5, 0.5]::real[] AS distance
FROM test_vectors_opts
ORDER BY embedding <-> ARRAY[0.5, 0.5, 0.5]::real[]
LIMIT 10;
-- Reset to default
RESET ruvector.ef_search;
-- ============================================================================
-- Test 11: Operator Functionality
-- ============================================================================
\echo '=== Test 11: Distance Operator Tests ==='
-- Test L2 distance operator
SELECT
ARRAY[1.0, 2.0, 3.0]::real[] <-> ARRAY[4.0, 5.0, 6.0]::real[] AS l2_dist;
-- Test cosine distance operator
SELECT
ARRAY[1.0, 0.0, 0.0]::real[] <=> ARRAY[0.0, 1.0, 0.0]::real[] AS cosine_dist;
-- Test inner product operator
SELECT
ARRAY[1.0, 2.0, 3.0]::real[] <#> ARRAY[4.0, 5.0, 6.0]::real[] AS neg_ip;
-- ============================================================================
-- Test 12: Edge Cases
-- ============================================================================
\echo '=== Test 12: Edge Cases ==='
-- Empty result set
SELECT id, embedding <-> ARRAY[100.0, 100.0, 100.0]::real[] AS distance
FROM test_vectors
WHERE id < 0 -- No results
ORDER BY embedding <-> ARRAY[100.0, 100.0, 100.0]::real[]
LIMIT 5;
-- Single vector table
CREATE TABLE test_single_vector (
id SERIAL PRIMARY KEY,
embedding real[]
);
INSERT INTO test_single_vector (embedding) VALUES (ARRAY[1.0, 2.0, 3.0]::real[]);
CREATE INDEX test_single_vector_idx ON test_single_vector
USING hnsw (embedding hnsw_l2_ops);
SELECT * FROM test_single_vector
ORDER BY embedding <-> ARRAY[0.0, 0.0, 0.0]::real[]
LIMIT 5;
-- ============================================================================
-- Cleanup
-- ============================================================================
\echo '=== Cleanup ==='
DROP TABLE IF EXISTS test_vectors CASCADE;
DROP TABLE IF EXISTS test_vectors_opts CASCADE;
DROP TABLE IF EXISTS test_vectors_cosine CASCADE;
DROP TABLE IF EXISTS test_vectors_ip CASCADE;
DROP TABLE IF EXISTS test_vectors_high_dim CASCADE;
DROP TABLE IF EXISTS test_single_vector CASCADE;
\echo '=== All tests completed successfully ==='