diff --git a/docker/Dockerfile b/docker/Dockerfile index a2f464f5..c20c1a76 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -201,31 +201,38 @@ ARG GITHUB_ARTIFACTORY ARG KTRANSFORMERS_VERSION ARG KTRANSFORMERS_WHEEL ARG FLASH_ATTN_WHEEL +ARG FUNCTIONALITY=sft WORKDIR /workspace -# Create two conda environments with Python 3.12 +# Create conda environments (fine-tune only needed for sft mode) RUN conda create -n serve python=3.12 -y \ - && conda create -n fine-tune python=3.12 -y + && if [ "$FUNCTIONALITY" = "sft" ]; then conda create -n fine-tune python=3.12 -y; fi -# Set pip mirror for both conda envs +# Set pip mirror for conda envs RUN /opt/miniconda3/envs/serve/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \ - && /opt/miniconda3/envs/fine-tune/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple + && if [ "$FUNCTIONALITY" = "sft" ]; then \ + /opt/miniconda3/envs/fine-tune/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple; \ + fi # Clone repositories # Use kvcache-ai/sglang fork with kimi_k2 branch RUN git clone https://${GITHUB_ARTIFACTORY}/kvcache-ai/sglang.git /workspace/sglang \ && cd /workspace/sglang && git checkout kimi_k2 -RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory \ - && git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.git /workspace/ktransformers \ - && cd /workspace/ktransformers && git submodule update --init --recursive +RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.git /workspace/ktransformers \ + && cd /workspace/ktransformers && git submodule update --init --recursive \ + && if [ "$FUNCTIONALITY" = "sft" ]; then \ + git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory; \ + fi -# Download ktransformers wheel and flash_attn wheel for fine-tune env -RUN curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${KTRANSFORMERS_WHEEL} \ - https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers/releases/download/v${KTRANSFORMERS_VERSION}/${KTRANSFORMERS_WHEEL} \ - && curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${FLASH_ATTN_WHEEL} \ - https://${GITHUB_ARTIFACTORY}/Dao-AILab/flash-attention/releases/download/v2.8.3/${FLASH_ATTN_WHEEL} +# Download ktransformers wheel and flash_attn wheel for fine-tune env (sft mode only) +RUN if [ "$FUNCTIONALITY" = "sft" ]; then \ + curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${KTRANSFORMERS_WHEEL} \ + https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers/releases/download/v${KTRANSFORMERS_VERSION}/${KTRANSFORMERS_WHEEL} \ + && curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${FLASH_ATTN_WHEEL} \ + https://${GITHUB_ARTIFACTORY}/Dao-AILab/flash-attention/releases/download/v2.8.3/${FLASH_ATTN_WHEEL}; \ + fi ######################################################## # Environment 1: serve (sglang + kt-kernel) @@ -318,47 +325,59 @@ RUN . /opt/miniconda3/etc/profile.d/conda.sh && conda activate serve \ && CPUINFER_BUILD_ALL_VARIANTS=1 ./install.sh build ######################################################## -# Environment 2: fine-tune (LLaMA-Factory + ktransformers) +# Environment 2: fine-tune (LLaMA-Factory + ktransformers) - sft mode only ######################################################## # Install dependency libraries for ktransformers (CUDA 11.8 runtime required) -RUN conda install -n fine-tune -y -c conda-forge libstdcxx-ng gcc_impl_linux-64 \ - && conda install -n fine-tune -y -c nvidia/label/cuda-11.8.0 cuda-runtime +RUN if [ "$FUNCTIONALITY" = "sft" ]; then \ + conda install -n fine-tune -y -c conda-forge libstdcxx-ng gcc_impl_linux-64 \ + && conda install -n fine-tune -y -c nvidia/label/cuda-11.8.0 cuda-runtime; \ + fi # Install PyTorch 2.8 in fine-tune env RUN --mount=type=cache,target=/root/.cache/pip \ - case "$CUDA_VERSION" in \ - 12.6.1) CUINDEX=126 ;; \ - 12.8.1) CUINDEX=128 ;; \ - 12.9.1) CUINDEX=129 ;; \ - 13.0.1) CUINDEX=130 ;; \ - esac \ - && /opt/miniconda3/envs/fine-tune/bin/pip install --upgrade pip setuptools wheel \ - && /opt/miniconda3/envs/fine-tune/bin/pip install \ - torch==2.8.0 \ - torchvision \ - torchaudio \ - --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} + if [ "$FUNCTIONALITY" = "sft" ]; then \ + case "$CUDA_VERSION" in \ + 12.6.1) CUINDEX=126 ;; \ + 12.8.1) CUINDEX=128 ;; \ + 12.9.1) CUINDEX=129 ;; \ + 13.0.1) CUINDEX=130 ;; \ + esac \ + && /opt/miniconda3/envs/fine-tune/bin/pip install --upgrade pip setuptools wheel hatchling \ + && /opt/miniconda3/envs/fine-tune/bin/pip install \ + torch==2.8.0 \ + torchvision \ + torchaudio \ + --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX}; \ + fi # Install LLaMA-Factory in fine-tune env RUN --mount=type=cache,target=/root/.cache/pip \ - cd /workspace/LLaMA-Factory \ - && /opt/miniconda3/envs/fine-tune/bin/pip install -e ".[torch,metrics]" --no-build-isolation + if [ "$FUNCTIONALITY" = "sft" ]; then \ + cd /workspace/LLaMA-Factory \ + && /opt/miniconda3/envs/fine-tune/bin/pip install -e ".[torch,metrics]" --no-build-isolation; \ + fi # Install ktransformers wheel in fine-tune env RUN --mount=type=cache,target=/root/.cache/pip \ - /opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${KTRANSFORMERS_WHEEL} + if [ "$FUNCTIONALITY" = "sft" ]; then \ + /opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${KTRANSFORMERS_WHEEL}; \ + fi # Install flash_attn wheel in fine-tune env RUN --mount=type=cache,target=/root/.cache/pip \ - /opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${FLASH_ATTN_WHEEL} + if [ "$FUNCTIONALITY" = "sft" ]; then \ + /opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${FLASH_ATTN_WHEEL}; \ + fi # Install NCCL for fine-tune env RUN --mount=type=cache,target=/root/.cache/pip \ - if [ "${CUDA_VERSION%%.*}" = "12" ]; then \ - /opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \ - elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \ - /opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \ + if [ "$FUNCTIONALITY" = "sft" ]; then \ + if [ "${CUDA_VERSION%%.*}" = "12" ]; then \ + /opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \ + elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \ + /opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \ + fi; \ fi ######################################################## @@ -366,13 +385,18 @@ RUN --mount=type=cache,target=/root/.cache/pip \ ######################################################## # Clean up downloaded wheels -RUN rm -f /workspace/${KTRANSFORMERS_WHEEL} /workspace/${FLASH_ATTN_WHEEL} +RUN if [ "$FUNCTIONALITY" = "sft" ]; then \ + rm -f /workspace/${KTRANSFORMERS_WHEEL} /workspace/${FLASH_ATTN_WHEEL}; \ + fi # Initialize conda for bash RUN /opt/miniconda3/bin/conda init bash # Create shell aliases for convenience -RUN echo '\n# Conda environment aliases\nalias serve="conda activate serve"\nalias finetune="conda activate fine-tune"' >> /root/.bashrc +RUN echo '\n# Conda environment aliases\nalias serve="conda activate serve"' >> /root/.bashrc \ + && if [ "$FUNCTIONALITY" = "sft" ]; then \ + echo 'alias finetune="conda activate fine-tune"' >> /root/.bashrc; \ + fi ######################################################## # Extract version information for image naming @@ -392,12 +416,17 @@ RUN set -x && \ echo "KTRANSFORMERS_VERSION=$KTRANSFORMERS_VERSION" >> /workspace/versions.env && \ echo "Extracted KTransformers version: $KTRANSFORMERS_VERSION" && \ \ - # LLaMA-Factory version (from fine-tune environment) - . /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune && \ - cd /workspace/LLaMA-Factory && \ - LLAMAFACTORY_VERSION=$(python -c "import sys; sys.path.insert(0, 'src'); from llamafactory import __version__; print(__version__)" 2>/dev/null || echo "unknown") && \ - echo "LLAMAFACTORY_VERSION=$LLAMAFACTORY_VERSION" >> /workspace/versions.env && \ - echo "Extracted LLaMA-Factory version: $LLAMAFACTORY_VERSION" && \ + # LLaMA-Factory version (from fine-tune environment, sft mode only) + if [ "$FUNCTIONALITY" = "sft" ]; then \ + . /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune && \ + cd /workspace/LLaMA-Factory && \ + LLAMAFACTORY_VERSION=$(python -c "import sys; sys.path.insert(0, 'src'); from llamafactory import __version__; print(__version__)" 2>/dev/null || echo "unknown") && \ + echo "LLAMAFACTORY_VERSION=$LLAMAFACTORY_VERSION" >> /workspace/versions.env && \ + echo "Extracted LLaMA-Factory version: $LLAMAFACTORY_VERSION"; \ + else \ + echo "LLAMAFACTORY_VERSION=none" >> /workspace/versions.env && \ + echo "LLaMA-Factory not installed (infer mode)"; \ + fi && \ \ # Display all versions echo "=== Version Summary ===" && \ diff --git a/docker/build-docker-tar.sh b/docker/build-docker-tar.sh index 26730747..9a8bfa01 100755 --- a/docker/build-docker-tar.sh +++ b/docker/build-docker-tar.sh @@ -261,6 +261,7 @@ build_image() { build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR") build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT") build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1") + build_args+=("--build-arg" "FUNCTIONALITY=$FUNCTIONALITY") # Add proxy settings if provided if [ -n "$HTTP_PROXY" ]; then diff --git a/docker/docker-utils.sh b/docker/docker-utils.sh index 988aac32..3277668a 100755 --- a/docker/docker-utils.sh +++ b/docker/docker-utils.sh @@ -183,7 +183,7 @@ generate_image_name() { llama_ver=$(echo "$versions" | grep "^LLAMAFACTORY_VERSION=" | cut -d= -f2) # Validate versions were extracted - if [ -z "$sglang_ver" ] || [ -z "$ktrans_ver" ] || [ -z "$llama_ver" ]; then + if [ -z "$sglang_ver" ] || [ -z "$ktrans_ver" ]; then log_error "Failed to parse versions from input" return 1 fi diff --git a/docker/push-to-dockerhub.sh b/docker/push-to-dockerhub.sh index 8b8fc3e2..0a749ee7 100755 --- a/docker/push-to-dockerhub.sh +++ b/docker/push-to-dockerhub.sh @@ -313,6 +313,7 @@ build_image() { build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR") build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT") build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1") + build_args+=("--build-arg" "FUNCTIONALITY=$FUNCTIONALITY") # Add proxy settings if provided if [ -n "$HTTP_PROXY" ]; then @@ -884,6 +885,7 @@ build_image() { build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR") build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT") build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1") + build_args+=("--build-arg" "FUNCTIONALITY=$FUNCTIONALITY") # Add proxy settings if provided if [ -n "$HTTP_PROXY" ]; then