mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-23 12:45:01 +00:00
* docker: add OCI image labels to all published images * docker: propagate OCI labels as manifest and index annotations * docker: drop hardcoded org URL and revert accidental intel version bump The OCI image url and source are now driven by build args with a sensible default. The workflow passes the actual repository url so fork builds get labels pointing at the fork instead of upstream. Also restores the IGC, compute runtime, and IGDGMM versions in the intel Dockerfile labeled stage which I accidentally bumped in the first commit. * docker: add skip_s390x workflow_dispatch input for fast test runs Lets maintainers and PR authors trigger the docker workflow without the s390x build target, which depends on the IBM Z runner and is by far the slowest job in the matrix. The flag filters the s390x row out of the build matrix before merge_matrix is derived, so the merge job sees a consistent shape too. Signed-off-by: Samaresh Kumar Singh <ssam3003@gmail.com> --------- Signed-off-by: Samaresh Kumar Singh <ssam3003@gmail.com>
107 lines
2.8 KiB
Docker
107 lines
2.8 KiB
Docker
ARG UBUNTU_VERSION=24.04
|
|
ARG BUILD_DATE=N/A
|
|
ARG APP_VERSION=N/A
|
|
ARG APP_REVISION=N/A
|
|
|
|
FROM ubuntu:$UBUNTU_VERSION AS build
|
|
|
|
ARG TARGETARCH
|
|
|
|
RUN apt-get update && \
|
|
apt-get install -y gcc-14 g++-14 build-essential git cmake libssl-dev
|
|
|
|
ENV CC=gcc-14 CXX=g++-14
|
|
|
|
WORKDIR /app
|
|
|
|
COPY . .
|
|
|
|
RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
|
|
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
|
|
else \
|
|
echo "Unsupported architecture"; \
|
|
exit 1; \
|
|
fi && \
|
|
cmake --build build -j $(nproc)
|
|
|
|
RUN mkdir -p /app/lib && \
|
|
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
|
|
|
RUN mkdir -p /app/full \
|
|
&& cp build/bin/* /app/full \
|
|
&& cp *.py /app/full \
|
|
&& cp -r gguf-py /app/full \
|
|
&& cp -r requirements /app/full \
|
|
&& cp requirements.txt /app/full \
|
|
&& cp .devops/tools.sh /app/full/tools.sh
|
|
|
|
## Base image
|
|
FROM ubuntu:$UBUNTU_VERSION AS base
|
|
|
|
ARG BUILD_DATE=N/A
|
|
ARG APP_VERSION=N/A
|
|
ARG APP_REVISION=N/A
|
|
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
|
|
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
|
|
LABEL org.opencontainers.image.created=$BUILD_DATE \
|
|
org.opencontainers.image.version=$APP_VERSION \
|
|
org.opencontainers.image.revision=$APP_REVISION \
|
|
org.opencontainers.image.title="llama.cpp" \
|
|
org.opencontainers.image.description="LLM inference in C/C++" \
|
|
org.opencontainers.image.url=$IMAGE_URL \
|
|
org.opencontainers.image.source=$IMAGE_SOURCE
|
|
|
|
RUN apt-get update \
|
|
&& apt-get install -y libgomp1 curl \
|
|
&& apt autoremove -y \
|
|
&& apt clean -y \
|
|
&& rm -rf /tmp/* /var/tmp/* \
|
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
&& find /var/cache -type f -delete
|
|
|
|
COPY --from=build /app/lib/ /app
|
|
|
|
### Full
|
|
FROM base AS full
|
|
|
|
COPY --from=build /app/full /app
|
|
|
|
WORKDIR /app
|
|
|
|
RUN apt-get update \
|
|
&& apt-get install -y \
|
|
git \
|
|
python3 \
|
|
python3-pip \
|
|
python3-wheel \
|
|
&& pip install --break-system-packages --upgrade setuptools \
|
|
&& pip install --break-system-packages -r requirements.txt \
|
|
&& apt autoremove -y \
|
|
&& apt clean -y \
|
|
&& rm -rf /tmp/* /var/tmp/* \
|
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
&& find /var/cache -type f -delete
|
|
|
|
ENTRYPOINT ["/app/tools.sh"]
|
|
|
|
### Light, CLI only
|
|
FROM base AS light
|
|
|
|
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
|
|
|
|
WORKDIR /app
|
|
|
|
ENTRYPOINT [ "/app/llama-cli" ]
|
|
|
|
### Server, Server only
|
|
FROM base AS server
|
|
|
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
|
|
|
COPY --from=build /app/full/llama-server /app
|
|
|
|
WORKDIR /app
|
|
|
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
|
|
|
ENTRYPOINT [ "/app/llama-server" ]
|