Merge pull request #1337 from liu-shaojun/docker_xpu

Add Dockerfile and usage guide for XPU support
2025-09-09 13:55:27 +00:00 · 2025-05-28 14:08:46 +08:00 · 2025-05-28 14:08:46 +08:00 · ce75fcd7dd
commit ce75fcd7dd
parent 0c44f2e211 404ad39a04
3 changed files with 165 additions and 0 deletions
--- a/Dockerfile.xpu
+++ b/Dockerfile.xpu
@ -0,0 +1,68 @@
 # Base image
 FROM intel/oneapi-basekit:2025.0.1-0-devel-ubuntu22.04
 ARG http_proxy
 ARG https_proxy
 ENV DEBIAN_FRONTEND=noninteractive
 ENV CONDA_DIR=/opt/conda
 # Install dependencies
 RUN apt-get update && apt-get install -y \
    wget \
    curl \
    bash \
    git \
    vim \
    ca-certificates \
    binutils \
    cmake \
    g++ \
    && rm -rf /var/lib/apt/lists/*
 # Install Miniforge
 RUN wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -O /tmp/miniforge.sh && \
    bash /tmp/miniforge.sh -b -p $CONDA_DIR && \
    rm /tmp/miniforge.sh && \
    $CONDA_DIR/bin/conda clean -afy
 # Add conda to PATH
 ENV PATH=$CONDA_DIR/bin:$PATH
 RUN bash -c "\
    source /opt/conda/etc/profile.d/conda.sh && \
    conda create --name ktransformers python=3.11 -y && \
    conda activate ktransformers && \
    conda env list && \
    conda install -c conda-forge libstdcxx-ng -y && \
    strings \$(find /opt/conda/envs/ktransformers/lib -name 'libstdc++.so.6') | grep GLIBCXX | grep 3.4.32 \
 "
 RUN bash -c "\
    source /opt/conda/etc/profile.d/conda.sh && \
    conda activate ktransformers && \
    pip install ipex-llm[xpu_2.6]==2.3.0b20250518 --extra-index-url https://download.pytorch.org/whl/xpu && \
    pip uninstall -y torch torchvision torchaudio && \
    pip install torch==2.7+xpu torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu && \
    pip uninstall -y intel-opencl-rt dpcpp-cpp-rt && \
    pip list \
 "
 # Clone and set up ktransformers repo
 RUN bash -c "\
    source $CONDA_DIR/etc/profile.d/conda.sh && \
    conda activate ktransformers && \
    git clone https://github.com/kvcache-ai/ktransformers.git && \
    cd ktransformers && \
    git submodule update --init && \
    sed -i 's/torch\.xpu\.is_available()/True/g' setup.py && \
    bash install.sh --dev xpu \
 "
 # Init conda and prepare bashrc
 RUN conda init bash && \
    echo "source $CONDA_DIR/etc/profile.d/conda.sh" >> ~/.bashrc && \
    echo "conda activate ktransformers" >> ~/.bashrc
 WORKDIR /ktransformers/
 CMD ["bash"]
--- a/doc/en/Docker_xpu.md
+++ b/doc/en/Docker_xpu.md
@ -0,0 +1,94 @@
 # Intel GPU Docker Guide (Beta)
 ## Prerequisites
 * Docker must be installed and running on your system.
 * Create a folder to store big models & intermediate files (e.g., /mnt/models)
 * **Before proceeding, ensure the Intel GPU driver is installed correctly on your host:** [Installation Guide](./xpu.md#1-install-intel-gpu-driver)
 ---
 ## Building the Docker Image Locally
 1. Clone the repository and navigate to the project directory:
   ```bash
   git clone https://github.com/kvcache-ai/ktransformers.git
   cd ktransformers
   ```
 2. Build the Docker image using the XPU-specific [Dockerfile.xpu](../../Dockerfile.xpu):
   ```bash
   sudo http_proxy=$HTTP_PROXY \
        https_proxy=$HTTPS_PROXY \
        docker build \
          --build-arg http_proxy=$HTTP_PROXY \
          --build-arg https_proxy=$HTTPS_PROXY \
          -t kt_xpu:0.3.1 \
          -f Dockerfile.xpu \
          .
   ```
 ---
 ## Running the Container
 ### 1. Start the container
 ```bash
 sudo docker run -td --privileged \
    --net=host \
    --device=/dev/dri \
    --shm-size="16g" \
    -v /path/to/models:/models \
    -e http_proxy=$HTTP_PROXY \
    -e https_proxy=$HTTPS_PROXY \
    --name ktransformers_xpu \
    kt_xpu:0.3.1
 ```
 **Note**: Replace `/path/to/models` with your actual model directory path (e.g., `/mnt/models`).
 ---
 ### 2. Access the container
 ```bash
 sudo docker exec -it ktransformers_xpu /bin/bash
 ```
 ---
 ### 3. Set required XPU environment variables (inside the container)
 ```bash
 export SYCL_CACHE_PERSISTENT=1
 export ONEAPI_DEVICE_SELECTOR=level_zero:0
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 ```
 ---
 ### 4. Run the sample script
 ```bash
 python ktransformers/local_chat.py \
  --model_path deepseek-ai/DeepSeek-R1 \
  --gguf_path <path_to_gguf_files> \
  --optimize_config_path ktransformers/optimize/optimize_rules/xpu/DeepSeek-V3-Chat.yaml \
  --cpu_infer <cpu_cores + 1> \
  --device xpu \
  --max_new_tokens 200
 ```
 **Note**:
 * Replace `<path_to_gguf_files>` with the path to your GGUF model files.
 * Replace `<cpu_cores + 1>` with the number of CPU cores you want to use plus one.
 ---
 ## Additional Information
 For more configuration options and usage details, refer to the [project README](../../README.md). To run KTransformers natively on XPU (outside of Docker), please refer to [xpu.md](./xpu.md).
--- a/doc/en/xpu.md
+++ b/doc/en/xpu.md
@ -129,3 +129,6 @@ Ensure you have permissions to access /dev/dri/renderD*. This typically requires
 sudo gpasswd -a ${USER} render
 newgrp render
 ```
 ## Additional Information
 To run KTransformers on XPU with Docker, please refer to [Docker_xpu.md](./Docker_xpu.md).