mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-27 00:14:49 +00:00
* docker: add OCI image labels to all published images * docker: propagate OCI labels as manifest and index annotations * docker: drop hardcoded org URL and revert accidental intel version bump The OCI image url and source are now driven by build args with a sensible default. The workflow passes the actual repository url so fork builds get labels pointing at the fork instead of upstream. Also restores the IGC, compute runtime, and IGDGMM versions in the intel Dockerfile labeled stage which I accidentally bumped in the first commit. * docker: add skip_s390x workflow_dispatch input for fast test runs Lets maintainers and PR authors trigger the docker workflow without the s390x build target, which depends on the IBM Z runner and is by far the slowest job in the matrix. The flag filters the s390x row out of the build matrix before merge_matrix is derived, so the merge job sees a consistent shape too. Signed-off-by: Samaresh Kumar Singh <ssam3003@gmail.com> --------- Signed-off-by: Samaresh Kumar Singh <ssam3003@gmail.com>
130 lines
3.7 KiB
Docker
130 lines
3.7 KiB
Docker
ARG UBUNTU_VERSION=24.04
|
|
|
|
# This needs to generally match the container host's environment.
|
|
ARG ROCM_VERSION=7.2.1
|
|
ARG AMDGPU_VERSION=7.2.1
|
|
|
|
# Target the ROCm build image
|
|
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
|
|
|
ARG BUILD_DATE=N/A
|
|
ARG APP_VERSION=N/A
|
|
ARG APP_REVISION=N/A
|
|
|
|
### Build image
|
|
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
|
|
|
# Unless otherwise specified, we make a fat build.
|
|
# This is mostly tied to rocBLAS supported archs.
|
|
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.2.1/reference/system-requirements.html
|
|
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityrad/native_linux/native_linux_compatibility.html
|
|
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityryz/native_linux/native_linux_compatibility.html
|
|
|
|
ARG ROCM_DOCKER_ARCH='gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1150;gfx1200;gfx1201'
|
|
|
|
# Set ROCm architectures
|
|
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
|
|
|
|
RUN apt-get update \
|
|
&& apt-get install -y \
|
|
build-essential \
|
|
cmake \
|
|
git \
|
|
libssl-dev \
|
|
curl \
|
|
libgomp1
|
|
|
|
WORKDIR /app
|
|
|
|
COPY . .
|
|
|
|
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
|
|
cmake -S . -B build \
|
|
-DGGML_HIP=ON \
|
|
-DGGML_HIP_ROCWMMA_FATTN=ON \
|
|
-DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
|
|
-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
|
|
-DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
|
|
&& cmake --build build --config Release -j$(nproc)
|
|
|
|
RUN mkdir -p /app/lib \
|
|
&& find build -name "*.so*" -exec cp -P {} /app/lib \;
|
|
|
|
RUN mkdir -p /app/full \
|
|
&& cp build/bin/* /app/full \
|
|
&& cp *.py /app/full \
|
|
&& cp -r gguf-py /app/full \
|
|
&& cp -r requirements /app/full \
|
|
&& cp requirements.txt /app/full \
|
|
&& cp .devops/tools.sh /app/full/tools.sh
|
|
|
|
## Base image
|
|
FROM ${BASE_ROCM_DEV_CONTAINER} AS base
|
|
|
|
ARG BUILD_DATE=N/A
|
|
ARG APP_VERSION=N/A
|
|
ARG APP_REVISION=N/A
|
|
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
|
|
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
|
|
LABEL org.opencontainers.image.created=$BUILD_DATE \
|
|
org.opencontainers.image.version=$APP_VERSION \
|
|
org.opencontainers.image.revision=$APP_REVISION \
|
|
org.opencontainers.image.title="llama.cpp" \
|
|
org.opencontainers.image.description="LLM inference in C/C++" \
|
|
org.opencontainers.image.url=$IMAGE_URL \
|
|
org.opencontainers.image.source=$IMAGE_SOURCE
|
|
|
|
RUN apt-get update \
|
|
&& apt-get install -y libgomp1 curl \
|
|
&& apt autoremove -y \
|
|
&& apt clean -y \
|
|
&& rm -rf /tmp/* /var/tmp/* \
|
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
&& find /var/cache -type f -delete
|
|
|
|
COPY --from=build /app/lib/ /app
|
|
|
|
### Full
|
|
FROM base AS full
|
|
|
|
COPY --from=build /app/full /app
|
|
|
|
WORKDIR /app
|
|
|
|
RUN apt-get update \
|
|
&& apt-get install -y \
|
|
git \
|
|
python3-pip \
|
|
python3 \
|
|
python3-wheel \
|
|
&& pip install --break-system-packages --upgrade setuptools \
|
|
&& pip install --break-system-packages -r requirements.txt \
|
|
&& apt autoremove -y \
|
|
&& apt clean -y \
|
|
&& rm -rf /tmp/* /var/tmp/* \
|
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
&& find /var/cache -type f -delete
|
|
|
|
ENTRYPOINT ["/app/tools.sh"]
|
|
|
|
### Light, CLI only
|
|
FROM base AS light
|
|
|
|
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
|
|
|
|
WORKDIR /app
|
|
|
|
ENTRYPOINT [ "/app/llama-cli" ]
|
|
|
|
### Server, Server only
|
|
FROM base AS server
|
|
|
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
|
|
|
COPY --from=build /app/full/llama-server /app
|
|
|
|
WORKDIR /app
|
|
|
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
|
|
|
ENTRYPOINT [ "/app/llama-server" ]
|