diff --git a/Dockerfile.xpu b/Dockerfile.xpu new file mode 100644 index 0000000..bb4d2dd --- /dev/null +++ b/Dockerfile.xpu @@ -0,0 +1,68 @@ +# Base image +FROM intel/oneapi-basekit:2025.0.1-0-devel-ubuntu22.04 + +ARG http_proxy +ARG https_proxy + +ENV DEBIAN_FRONTEND=noninteractive +ENV CONDA_DIR=/opt/conda + +# Install dependencies +RUN apt-get update && apt-get install -y \ + wget \ + curl \ + bash \ + git \ + vim \ + ca-certificates \ + binutils \ + cmake \ + g++ \ + && rm -rf /var/lib/apt/lists/* + +# Install Miniforge +RUN wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -O /tmp/miniforge.sh && \ + bash /tmp/miniforge.sh -b -p $CONDA_DIR && \ + rm /tmp/miniforge.sh && \ + $CONDA_DIR/bin/conda clean -afy + +# Add conda to PATH +ENV PATH=$CONDA_DIR/bin:$PATH + +RUN bash -c "\ + source /opt/conda/etc/profile.d/conda.sh && \ + conda create --name ktransformers python=3.11 -y && \ + conda activate ktransformers && \ + conda env list && \ + conda install -c conda-forge libstdcxx-ng -y && \ + strings \$(find /opt/conda/envs/ktransformers/lib -name 'libstdc++.so.6') | grep GLIBCXX | grep 3.4.32 \ +" + +RUN bash -c "\ + source /opt/conda/etc/profile.d/conda.sh && \ + conda activate ktransformers && \ + pip install ipex-llm[xpu_2.6]==2.3.0b20250518 --extra-index-url https://download.pytorch.org/whl/xpu && \ + pip uninstall -y torch torchvision torchaudio && \ + pip install torch==2.7+xpu torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu && \ + pip uninstall -y intel-opencl-rt dpcpp-cpp-rt && \ + pip list \ +" + +# Clone and set up ktransformers repo +RUN bash -c "\ + source $CONDA_DIR/etc/profile.d/conda.sh && \ + conda activate ktransformers && \ + git clone https://github.com/kvcache-ai/ktransformers.git && \ + cd ktransformers && \ + git submodule update --init && \ + sed -i 's/torch\.xpu\.is_available()/True/g' setup.py && \ + bash install.sh --dev xpu \ +" + +# Init conda and prepare bashrc +RUN conda init bash && \ + echo "source $CONDA_DIR/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate ktransformers" >> ~/.bashrc + +WORKDIR /ktransformers/ +CMD ["bash"] diff --git a/doc/en/Docker_xpu.md b/doc/en/Docker_xpu.md new file mode 100644 index 0000000..cb92d01 --- /dev/null +++ b/doc/en/Docker_xpu.md @@ -0,0 +1,94 @@ +# Intel GPU Docker Guide (Beta) + +## Prerequisites + +* Docker must be installed and running on your system. +* Create a folder to store big models & intermediate files (e.g., /mnt/models) +* **Before proceeding, ensure the Intel GPU driver is installed correctly on your host:** [Installation Guide](./xpu.md#1-install-intel-gpu-driver) + +--- + +## Building the Docker Image Locally + +1. Clone the repository and navigate to the project directory: + + ```bash + git clone https://github.com/kvcache-ai/ktransformers.git + cd ktransformers + ``` + +2. Build the Docker image using the XPU-specific [Dockerfile.xpu](../../Dockerfile.xpu): + + ```bash + sudo http_proxy=$HTTP_PROXY \ + https_proxy=$HTTPS_PROXY \ + docker build \ + --build-arg http_proxy=$HTTP_PROXY \ + --build-arg https_proxy=$HTTPS_PROXY \ + -t kt_xpu:0.3.1 \ + -f Dockerfile.xpu \ + . + ``` + +--- + +## Running the Container + +### 1. Start the container + +```bash +sudo docker run -td --privileged \ + --net=host \ + --device=/dev/dri \ + --shm-size="16g" \ + -v /path/to/models:/models \ + -e http_proxy=$HTTP_PROXY \ + -e https_proxy=$HTTPS_PROXY \ + --name ktransformers_xpu \ + kt_xpu:0.3.1 +``` + +**Note**: Replace `/path/to/models` with your actual model directory path (e.g., `/mnt/models`). + +--- + +### 2. Access the container + +```bash +sudo docker exec -it ktransformers_xpu /bin/bash +``` + +--- + +### 3. Set required XPU environment variables (inside the container) + +```bash +export SYCL_CACHE_PERSISTENT=1 +export ONEAPI_DEVICE_SELECTOR=level_zero:0 +export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 +``` + +--- + +### 4. Run the sample script + +```bash +python ktransformers/local_chat.py \ + --model_path deepseek-ai/DeepSeek-R1 \ + --gguf_path \ + --optimize_config_path ktransformers/optimize/optimize_rules/xpu/DeepSeek-V3-Chat.yaml \ + --cpu_infer \ + --device xpu \ + --max_new_tokens 200 +``` + +**Note**: + +* Replace `` with the path to your GGUF model files. +* Replace `` with the number of CPU cores you want to use plus one. + +--- + +## Additional Information + +For more configuration options and usage details, refer to the [project README](../../README.md). To run KTransformers natively on XPU (outside of Docker), please refer to [xpu.md](./xpu.md). diff --git a/doc/en/xpu.md b/doc/en/xpu.md index e8b3b90..78a1923 100644 --- a/doc/en/xpu.md +++ b/doc/en/xpu.md @@ -129,3 +129,6 @@ Ensure you have permissions to access /dev/dri/renderD*. This typically requires sudo gpasswd -a ${USER} render newgrp render ``` + +## Additional Information +To run KTransformers on XPU with Docker, please refer to [Docker_xpu.md](./Docker_xpu.md).