ruvector/scripts/training/Dockerfile
rUv 3dc7753473 refactor(training): use ruvllm-native tooling instead of llama.cpp
- Rewrite run_calibration.py to use gguf Python package + llama-cpp-python
  prebuilt wheels instead of compiling llama.cpp from source
- Simplify Dockerfile: single-stage, pip install only, no CUDA compilation
  (build time: ~5min vs 20+min)
- Update ADR-129 with tooling decision section explaining ruvllm-native choice
- Remove llama-imatrix and llama-quantize binary dependencies

Co-Authored-By: claude-flow <ruv@ruv.net>
2026-03-28 13:40:14 +00:00

40 lines
1.2 KiB
Docker

# RuvLTRA Training Pipeline
# Supports: LoRA SFT, DPO, imatrix calibration, GGUF conversion
# Target: Cloud Run Jobs with L4 GPU or Vertex AI
FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.11 python3.11-venv python3.11-dev python3-pip git curl \
&& rm -rf /var/lib/apt/lists/* \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \
&& update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1
# Install Python dependencies (prebuilt wheels, no CUDA compilation needed)
RUN pip install --no-cache-dir \
torch==2.3.1 \
transformers>=4.44.0 \
peft>=0.12.0 \
trl>=0.9.0 \
datasets>=2.20.0 \
huggingface_hub>=0.24.0 \
accelerate>=0.33.0 \
bitsandbytes>=0.43.0 \
sentencepiece \
protobuf \
safetensors
# Install llama-cpp-python with CUDA support (prebuilt wheel)
RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install --no-cache-dir llama-cpp-python>=0.2.80
# Install llama.cpp tools via pip (no source build needed)
RUN pip install --no-cache-dir gguf
WORKDIR /app
COPY . /app/
ENTRYPOINT ["python", "-u"]
CMD ["run_calibration.py"]