mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-24 13:44:06 +00:00
* docker: add OCI image labels to all published images * docker: propagate OCI labels as manifest and index annotations * docker: drop hardcoded org URL and revert accidental intel version bump The OCI image url and source are now driven by build args with a sensible default. The workflow passes the actual repository url so fork builds get labels pointing at the fork instead of upstream. Also restores the IGC, compute runtime, and IGDGMM versions in the intel Dockerfile labeled stage which I accidentally bumped in the first commit. * docker: add skip_s390x workflow_dispatch input for fast test runs Lets maintainers and PR authors trigger the docker workflow without the s390x build target, which depends on the IBM Z runner and is by far the slowest job in the matrix. The flag filters the s390x row out of the build matrix before merge_matrix is derived, so the merge job sees a consistent shape too. Signed-off-by: Samaresh Kumar Singh <ssam3003@gmail.com> --------- Signed-off-by: Samaresh Kumar Singh <ssam3003@gmail.com>
114 lines
3.2 KiB
Docker
114 lines
3.2 KiB
Docker
ARG UBUNTU_VERSION=24.04
|
|
# This needs to generally match the container host's environment.
|
|
ARG CUDA_VERSION=12.8.1
|
|
# Target the CUDA build image
|
|
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
|
|
|
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
|
|
|
ARG BUILD_DATE=N/A
|
|
ARG APP_VERSION=N/A
|
|
ARG APP_REVISION=N/A
|
|
|
|
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
|
|
|
# CUDA architecture to build for (defaults to all supported archs)
|
|
ARG CUDA_DOCKER_ARCH=default
|
|
|
|
RUN apt-get update && \
|
|
apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1
|
|
|
|
ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14
|
|
|
|
WORKDIR /app
|
|
|
|
COPY . .
|
|
|
|
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
|
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
|
fi && \
|
|
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
|
cmake --build build --config Release -j$(nproc)
|
|
|
|
RUN mkdir -p /app/lib && \
|
|
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
|
|
|
RUN mkdir -p /app/full \
|
|
&& cp build/bin/* /app/full \
|
|
&& cp *.py /app/full \
|
|
&& cp -r gguf-py /app/full \
|
|
&& cp -r requirements /app/full \
|
|
&& cp requirements.txt /app/full \
|
|
&& cp .devops/tools.sh /app/full/tools.sh
|
|
|
|
## Base image
|
|
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
|
|
|
|
ARG BUILD_DATE=N/A
|
|
ARG APP_VERSION=N/A
|
|
ARG APP_REVISION=N/A
|
|
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
|
|
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
|
|
LABEL org.opencontainers.image.created=$BUILD_DATE \
|
|
org.opencontainers.image.version=$APP_VERSION \
|
|
org.opencontainers.image.revision=$APP_REVISION \
|
|
org.opencontainers.image.title="llama.cpp" \
|
|
org.opencontainers.image.description="LLM inference in C/C++" \
|
|
org.opencontainers.image.url=$IMAGE_URL \
|
|
org.opencontainers.image.source=$IMAGE_SOURCE
|
|
|
|
RUN apt-get update \
|
|
&& apt-get install -y libgomp1 curl \
|
|
&& apt autoremove -y \
|
|
&& apt clean -y \
|
|
&& rm -rf /tmp/* /var/tmp/* \
|
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
&& find /var/cache -type f -delete
|
|
|
|
COPY --from=build /app/lib/ /app
|
|
|
|
### Full
|
|
FROM base AS full
|
|
|
|
COPY --from=build /app/full /app
|
|
|
|
WORKDIR /app
|
|
|
|
RUN apt-get update \
|
|
&& apt-get install -y \
|
|
git \
|
|
python3 \
|
|
python3-pip \
|
|
python3-wheel \
|
|
&& pip install --break-system-packages --upgrade setuptools \
|
|
&& pip install --break-system-packages -r requirements.txt \
|
|
&& apt autoremove -y \
|
|
&& apt clean -y \
|
|
&& rm -rf /tmp/* /var/tmp/* \
|
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
&& find /var/cache -type f -delete
|
|
|
|
|
|
ENTRYPOINT ["/app/tools.sh"]
|
|
|
|
### Light, CLI only
|
|
FROM base AS light
|
|
|
|
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
|
|
|
|
WORKDIR /app
|
|
|
|
ENTRYPOINT [ "/app/llama-cli" ]
|
|
|
|
### Server, Server only
|
|
FROM base AS server
|
|
|
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
|
|
|
COPY --from=build /app/full/llama-server /app
|
|
|
|
WORKDIR /app
|
|
|
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
|
|
|
ENTRYPOINT [ "/app/llama-server" ]
|