# Multi-stage Dockerfile for ruvector-postgres extension # Builds the extension and creates a PostgreSQL image with it installed # v2.0.1: Extension version aligned with Docker image tag (fixes #271) # Build stage # Using nightly Rust to support edition2024 crates in the registry FROM rustlang/rust:nightly-bookworm-slim AS builder # Install build dependencies including PostgreSQL 17 from PGDG RUN apt-get update && apt-get install -y \ build-essential \ libssl-dev \ pkg-config \ clang \ libclang-dev \ flex \ bison \ curl \ ca-certificates \ gnupg \ && rm -rf /var/lib/apt/lists/* # Add PostgreSQL official apt repository RUN curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor -o /usr/share/keyrings/postgresql-keyring.gpg \ && echo "deb [signed-by=/usr/share/keyrings/postgresql-keyring.gpg] http://apt.postgresql.org/pub/repos/apt bookworm-pgdg main" > /etc/apt/sources.list.d/pgdg.list # Install PostgreSQL 17 development packages RUN apt-get update && apt-get install -y \ postgresql-server-dev-17 \ postgresql-17 \ && rm -rf /var/lib/apt/lists/* # Install cargo-pgrx RUN cargo install cargo-pgrx --version 0.12.9 --locked # Set up workspace root — dependency crates use workspace inheritance WORKDIR /workspace # Create a minimal workspace Cargo.toml so dependency crates can resolve # workspace inheritance (edition.workspace, version.workspace, etc.) RUN cat > /workspace/Cargo.toml << 'WORKSPACE_EOF' [workspace] members = [ "crates/ruvector-postgres", "crates/ruvector-solver", "crates/ruvector-math", "crates/ruvector-attention", "crates/sona", "crates/ruvector-domain-expansion", "crates/ruvector-mincut-gated-transformer", ] resolver = "2" [workspace.package] version = "2.0.4" edition = "2021" rust-version = "1.77" license = "MIT" authors = ["Ruvector Team"] repository = "https://github.com/ruvnet/ruvector" [workspace.dependencies] serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" thiserror = "2.0" rand = "0.8" rand_distr = "0.4" tracing = "0.1" rayon = "1.10" crossbeam = "0.8" dashmap = "6.1" parking_lot = "0.12" once_cell = "1.20" criterion = { version = "0.5", features = ["html_reports"] } proptest = "1.5" nalgebra = { version = "0.33", default-features = false, features = ["std"] } ndarray = "0.16" chrono = "0.4" anyhow = "1.0" [profile.release] opt-level = 3 lto = "fat" codegen-units = 1 strip = true panic = "unwind" WORKSPACE_EOF # Copy ruvector-postgres source COPY crates/ruvector-postgres/ /workspace/crates/ruvector-postgres/ # Copy dependency crates COPY crates/ruvector-mincut-gated-transformer /workspace/crates/ruvector-mincut-gated-transformer/ COPY crates/ruvector-solver /workspace/crates/ruvector-solver/ COPY crates/ruvector-math /workspace/crates/ruvector-math/ COPY crates/ruvector-attention /workspace/crates/ruvector-attention/ COPY crates/sona /workspace/crates/sona/ COPY crates/ruvector-domain-expansion /workspace/crates/ruvector-domain-expansion/ # Copy rvf crates (path deps of ruvector-domain-expansion) COPY crates/rvf/rvf-types /workspace/crates/rvf/rvf-types/ COPY crates/rvf/rvf-wire /workspace/crates/rvf/rvf-wire/ COPY crates/rvf/rvf-crypto /workspace/crates/rvf/rvf-crypto/ # Copy the workspace Cargo.lock to pin dependency versions COPY Cargo.lock /workspace/Cargo.lock WORKDIR /workspace/crates/ruvector-postgres # Initialize pgrx with system PostgreSQL RUN cargo pgrx init --pg17=/usr/lib/postgresql/17/bin/pg_config # Pre-fetch dependencies to lock versions and warm the registry cache RUN cargo fetch # Configure cargo to avoid sparse registry issues with edition2024 crates # This uses the git protocol instead of sparse which skips problematic index entries ENV CARGO_REGISTRIES_CRATES_IO_PROTOCOL=git # Build the extension with all features including v0.3 modules # graph-complete includes: graph, hyperbolic, sparse RUN cargo pgrx package --features "pg17 index-all quant-all graph-complete embeddings gated-transformer analytics-complete attention-extended sona-learning domain-expansion" # Build the model downloader binary RUN cargo build --release --bin download-models --features "embeddings" # Set cache path and download embedding models # FASTEMBED_CACHE_DIR is the correct env var for fastembed-rs ENV FASTEMBED_CACHE_DIR=/opt/ruvector/models RUN mkdir -p /opt/ruvector/models && \ ./target/release/download-models && \ echo "Model cache size: $(du -sh /opt/ruvector/models)" && \ ls -la /opt/ruvector/models/ # Copy all SQL schema files (control file default_version=0.3.0 selects the right one) RUN for f in /workspace/crates/ruvector-postgres/sql/ruvector--*.sql; do \ cp "$f" /workspace/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ ; \ done && \ echo "SQL schemas copied:" && \ ls -1 /workspace/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--*.sql && \ echo "v0.3.0 function count: $(grep -c 'CREATE FUNCTION\|CREATE OR REPLACE FUNCTION' /workspace/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.3.0.sql)" # Verify the extension files are complete RUN ls -la /workspace/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ && \ echo "=== Extension control ===" && \ cat /workspace/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector.control # Runtime stage FROM postgres:17-bookworm # Labels LABEL maintainer="ruvector team " LABEL description="PostgreSQL with ruvector extension - high-performance vector database with 270+ SQL functions, Graph/Cypher/SPARQL, GNN, hybrid search, multi-tenancy, self-healing, SONA self-learning, and local embeddings" LABEL version="0.3.1" LABEL org.opencontainers.image.title="ruvector-postgres" LABEL org.opencontainers.image.version="0.3.1" LABEL org.opencontainers.image.vendor="ruv.io" LABEL org.opencontainers.image.source="https://github.com/ruvnet/ruvector" LABEL org.opencontainers.image.description="Drop-in pgvector replacement with SIMD, Flash Attention, GNN, Cypher, SPARQL, hybrid search, multi-tenancy, self-healing, and SONA" # Set embedding model cache path - models are pre-downloaded during build # FASTEMBED_CACHE_DIR is the correct env var for fastembed-rs ENV FASTEMBED_CACHE_DIR=/opt/ruvector/models # Copy pre-downloaded embedding models from builder COPY --from=builder /opt/ruvector/models /opt/ruvector/models # Copy the built extension from builder # Note: In a workspace, target/ is at the workspace root /workspace/target/ COPY --from=builder /workspace/target/release/ruvector-pg17/usr/share/postgresql/17/extension/* \ /usr/share/postgresql/17/extension/ COPY --from=builder /workspace/target/release/ruvector-pg17/usr/lib/postgresql/17/lib/* \ /usr/lib/postgresql/17/lib/ # Add initialization scripts RUN mkdir -p /docker-entrypoint-initdb.d # Copy the full initialization script with extension creation, role setup, and tests COPY --from=builder /workspace/crates/ruvector-postgres/docker/init.sql /docker-entrypoint-initdb.d/01-init.sql # Health check HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \ CMD pg_isready -U postgres || exit 1 # Expose PostgreSQL port EXPOSE 5432 # Use the default PostgreSQL entrypoint CMD ["postgres"]