Merge pull request #1337 from liu-shaojun/docker_xpu

Add Dockerfile and usage guide for XPU support
2025-09-07 13:09:50 +00:00 · 2025-05-28 14:08:46 +08:00 · 2025-05-28 14:08:46 +08:00 · ce75fcd7dd
commit ce75fcd7dd
parent 0c44f2e211 404ad39a04
3 changed files with 165 additions and 0 deletions
--- a/Dockerfile.xpu
+++ b/Dockerfile.xpu
@ -0,0 +1,68 @@
+# Base image
+FROM intel/oneapi-basekit:2025.0.1-0-devel-ubuntu22.04
+
+ARG http_proxy
+ARG https_proxy
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV CONDA_DIR=/opt/conda
+
+# Install dependencies
+RUN apt-get update && apt-get install -y \
+    wget \
+    curl \
+    bash \
+    git \
+    vim \
+    ca-certificates \
+    binutils \
+    cmake \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Miniforge
+RUN wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -O /tmp/miniforge.sh && \
+    bash /tmp/miniforge.sh -b -p $CONDA_DIR && \
+    rm /tmp/miniforge.sh && \
+    $CONDA_DIR/bin/conda clean -afy
+
+# Add conda to PATH
+ENV PATH=$CONDA_DIR/bin:$PATH
+
+RUN bash -c "\
+    source /opt/conda/etc/profile.d/conda.sh && \
+    conda create --name ktransformers python=3.11 -y && \
+    conda activate ktransformers && \
+    conda env list && \
+    conda install -c conda-forge libstdcxx-ng -y && \
+    strings \$(find /opt/conda/envs/ktransformers/lib -name 'libstdc++.so.6') | grep GLIBCXX | grep 3.4.32 \
+"
+
+RUN bash -c "\
+    source /opt/conda/etc/profile.d/conda.sh && \
+    conda activate ktransformers && \
+    pip install ipex-llm[xpu_2.6]==2.3.0b20250518 --extra-index-url https://download.pytorch.org/whl/xpu && \
+    pip uninstall -y torch torchvision torchaudio && \
+    pip install torch==2.7+xpu torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu && \
+    pip uninstall -y intel-opencl-rt dpcpp-cpp-rt && \
+    pip list \
+"
+
+# Clone and set up ktransformers repo
+RUN bash -c "\
+    source $CONDA_DIR/etc/profile.d/conda.sh && \
+    conda activate ktransformers && \
+    git clone https://github.com/kvcache-ai/ktransformers.git && \
+    cd ktransformers && \
+    git submodule update --init && \
+    sed -i 's/torch\.xpu\.is_available()/True/g' setup.py && \
+    bash install.sh --dev xpu \
+"
+
+# Init conda and prepare bashrc
+RUN conda init bash && \
+    echo "source $CONDA_DIR/etc/profile.d/conda.sh" >> ~/.bashrc && \
+    echo "conda activate ktransformers" >> ~/.bashrc
+
+WORKDIR /ktransformers/
+CMD ["bash"]
--- a/doc/en/Docker_xpu.md
+++ b/doc/en/Docker_xpu.md
@ -0,0 +1,94 @@
+# Intel GPU Docker Guide (Beta)
+
+## Prerequisites
+
+* Docker must be installed and running on your system.
+* Create a folder to store big models & intermediate files (e.g., /mnt/models)
+* **Before proceeding, ensure the Intel GPU driver is installed correctly on your host:** [Installation Guide](./xpu.md#1-install-intel-gpu-driver)
+
+---
+
+## Building the Docker Image Locally
+
+1. Clone the repository and navigate to the project directory:
+
+   ```bash
+   git clone https://github.com/kvcache-ai/ktransformers.git
+   cd ktransformers
+   ```
+
+2. Build the Docker image using the XPU-specific [Dockerfile.xpu](../../Dockerfile.xpu):
+
+   ```bash
+   sudo http_proxy=$HTTP_PROXY \
+        https_proxy=$HTTPS_PROXY \
+        docker build \
+          --build-arg http_proxy=$HTTP_PROXY \
+          --build-arg https_proxy=$HTTPS_PROXY \
+          -t kt_xpu:0.3.1 \
+          -f Dockerfile.xpu \
+          .
+   ```
+
+---
+
+## Running the Container
+
+### 1. Start the container
+
+```bash
+sudo docker run -td --privileged \
+    --net=host \
+    --device=/dev/dri \
+    --shm-size="16g" \
+    -v /path/to/models:/models \
+    -e http_proxy=$HTTP_PROXY \
+    -e https_proxy=$HTTPS_PROXY \
+    --name ktransformers_xpu \
+    kt_xpu:0.3.1
+```
+
+**Note**: Replace `/path/to/models` with your actual model directory path (e.g., `/mnt/models`).
+
+---
+
+### 2. Access the container
+
+```bash
+sudo docker exec -it ktransformers_xpu /bin/bash
+```
+
+---
+
+### 3. Set required XPU environment variables (inside the container)
+
+```bash
+export SYCL_CACHE_PERSISTENT=1
+export ONEAPI_DEVICE_SELECTOR=level_zero:0
+export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+```
+
+---
+
+### 4. Run the sample script
+
+```bash
+python ktransformers/local_chat.py \
+  --model_path deepseek-ai/DeepSeek-R1 \
+  --gguf_path <path_to_gguf_files> \
+  --optimize_config_path ktransformers/optimize/optimize_rules/xpu/DeepSeek-V3-Chat.yaml \
+  --cpu_infer <cpu_cores + 1> \
+  --device xpu \
+  --max_new_tokens 200
+```
+
+**Note**:
+
+* Replace `<path_to_gguf_files>` with the path to your GGUF model files.
+* Replace `<cpu_cores + 1>` with the number of CPU cores you want to use plus one.
+
+---
+
+## Additional Information
+
+For more configuration options and usage details, refer to the [project README](../../README.md). To run KTransformers natively on XPU (outside of Docker), please refer to [xpu.md](./xpu.md).
--- a/doc/en/xpu.md
+++ b/doc/en/xpu.md
@ -129,3 +129,6 @@ Ensure you have permissions to access /dev/dri/renderD*. This typically requires
 sudo gpasswd -a ${USER} render
 newgrp render
 ```
+
+## Additional Information
+To run KTransformers on XPU with Docker, please refer to [Docker_xpu.md](./Docker_xpu.md).