From f3be33a313608e09469b8d614ae0c962d3f135c9 Mon Sep 17 00:00:00 2001 From: Alisehen <814073252@qq.com> Date: Thu, 15 May 2025 06:39:02 +0000 Subject: [PATCH 1/5] add xpu parameters to install.sh --- doc/en/xpu.md | 4 +--- install.sh | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/doc/en/xpu.md b/doc/en/xpu.md index 0691ce1..e120168 100644 --- a/doc/en/xpu.md +++ b/doc/en/xpu.md @@ -62,9 +62,7 @@ cd ktransformers git submodule update --init # Install dependencies -bash install.sh -pip uninstall triton pytorch-triton-xpu -pip install pytorch-triton-xpu==3.3.0 --extra-index-url https://download.pytorch.org/whl/xpu # to avoid potential triton import error +bash install.sh --dev xpu ``` ## Running DeepSeek-R1 Models diff --git a/install.sh b/install.sh index 79f3166..260ae46 100644 --- a/install.sh +++ b/install.sh @@ -1,6 +1,20 @@ #!/bin/bash set -e +# default backend +DEV="cuda" + +# parse --dev argument +while [[ "$#" -gt 0 ]]; do + case $1 in + --dev) DEV="$2"; shift ;; + *) echo "Unknown parameter passed: $1"; exit 1 ;; + esac + shift +done +export DEV_BACKEND="$DEV" +echo "Selected backend: $DEV_BACKEND" + # clear build dirs rm -rf build rm -rf *.egg-info @@ -13,6 +27,14 @@ rm -rf ~/.ktransformers echo "Installing python dependencies from requirements.txt" pip install -r requirements-local_chat.txt pip install -r ktransformers/server/requirements.txt + +# XPU-specific fix for triton +if [[ "$DEV_BACKEND" == "xpu" ]]; then + echo "Replacing triton for XPU backend" + pip uninstall -y triton pytorch-triton-xpu || true + pip install pytorch-triton-xpu==3.3.0 --extra-index-url https://download.pytorch.org/whl/xpu +fi + echo "Installing ktransformers" KTRANSFORMERS_FORCE_BUILD=TRUE pip install -v . --no-build-isolation From 055680e26c97b1e5353e9d989b5e087b78f38511 Mon Sep 17 00:00:00 2001 From: Alisehen <814073252@qq.com> Date: Thu, 15 May 2025 07:03:45 +0000 Subject: [PATCH 2/5] add flashinfer to cuda device --- doc/en/balance-serve.md | 2 -- doc/en/install.md | 2 -- doc/en/llama4.md | 2 -- doc/zh/DeepseekR1_V3_tutorial_zh.md | 2 -- install.sh | 4 ++++ 5 files changed, 4 insertions(+), 8 deletions(-) diff --git a/doc/en/balance-serve.md b/doc/en/balance-serve.md index 4d72fbd..5217968 100644 --- a/doc/en/balance-serve.md +++ b/doc/en/balance-serve.md @@ -100,10 +100,8 @@ git submodule update --init --recursive # Install single NUMA dependencies USE_BALANCE_SERVE=1 bash ./install.sh -pip install third_party/custom_flashinfer/ # For those who have two cpu and 1T RAM(Dual NUMA): USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh -pip install third_party/custom_flashinfer/ ``` ## Running DeepSeek-R1-Q4KM Models diff --git a/doc/en/install.md b/doc/en/install.md index aee923b..031b541 100644 --- a/doc/en/install.md +++ b/doc/en/install.md @@ -117,13 +117,11 @@ Download source code and compile: ```shell USE_BALANCE_SERVE=1 bash ./install.sh - pip install third_party/custom_flashinfer/ ``` - For Multi-concurrency with two cpu and 1T RAM: ```shell USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh - pip install third_party/custom_flashinfer/ ``` - For Windows (Windows native temporarily deprecated, please try WSL) diff --git a/doc/en/llama4.md b/doc/en/llama4.md index b55c32f..8592871 100644 --- a/doc/en/llama4.md +++ b/doc/en/llama4.md @@ -68,10 +68,8 @@ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.o ```bash # Install single NUMA dependencies USE_BALANCE_SERVE=1 bash ./install.sh -pip install third_party/custom_flashinfer/ # For those who have two cpu and 1T RAM(Dual NUMA): USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh -pip install third_party/custom_flashinfer/ ``` ### 4. Use our custom config.json diff --git a/doc/zh/DeepseekR1_V3_tutorial_zh.md b/doc/zh/DeepseekR1_V3_tutorial_zh.md index bbe109c..5645f4f 100644 --- a/doc/zh/DeepseekR1_V3_tutorial_zh.md +++ b/doc/zh/DeepseekR1_V3_tutorial_zh.md @@ -127,10 +127,8 @@ cd ktransformers git submodule update --init --recursive # 如果使用双 numa 版本 USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh -pip install third_party/custom_flashinfer/ # 如果使用单 numa 版本 USE_BALANCE_SERVE=1 bash ./install.sh -pip install third_party/custom_flashinfer/ # 启动命令 python ktransformers/server/main.py --model_path --gguf_path --cpu_infer 62 --optimize_config_path --port 10002 --chunk_size 256 --max_new_tokens 1024 --max_batch_size 4 --port 10002 --cache_lens 32768 --backend_type balance_serve ``` diff --git a/install.sh b/install.sh index 260ae46..c19a18e 100644 --- a/install.sh +++ b/install.sh @@ -38,6 +38,10 @@ fi echo "Installing ktransformers" KTRANSFORMERS_FORCE_BUILD=TRUE pip install -v . --no-build-isolation +if [[ "$DEV_BACKEND" == "cuda" ]]; then + echo "Installing custom_flashinfer for CUDA backend" + pip install third_party/custom_flashinfer/ +fi # SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") # echo "Copying thirdparty libs to $SITE_PACKAGES" # cp -a csrc/balance_serve/build/third_party/prometheus-cpp/lib/libprometheus-cpp-*.so* $SITE_PACKAGES/ From edd9efa49ee53007740e6b88c209e7a60a1bb2f3 Mon Sep 17 00:00:00 2001 From: Alisehen <814073252@qq.com> Date: Thu, 15 May 2025 07:28:50 +0000 Subject: [PATCH 3/5] fix --- install.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/install.sh b/install.sh index c19a18e..c46c506 100644 --- a/install.sh +++ b/install.sh @@ -28,6 +28,9 @@ echo "Installing python dependencies from requirements.txt" pip install -r requirements-local_chat.txt pip install -r ktransformers/server/requirements.txt +echo "Installing ktransformers" +KTRANSFORMERS_FORCE_BUILD=TRUE pip install -v . --no-build-isolation + # XPU-specific fix for triton if [[ "$DEV_BACKEND" == "xpu" ]]; then echo "Replacing triton for XPU backend" @@ -35,9 +38,6 @@ if [[ "$DEV_BACKEND" == "xpu" ]]; then pip install pytorch-triton-xpu==3.3.0 --extra-index-url https://download.pytorch.org/whl/xpu fi -echo "Installing ktransformers" -KTRANSFORMERS_FORCE_BUILD=TRUE pip install -v . --no-build-isolation - if [[ "$DEV_BACKEND" == "cuda" ]]; then echo "Installing custom_flashinfer for CUDA backend" pip install third_party/custom_flashinfer/ From f3b1e36b6a170758571ca68f6d84ac9f67de7ca8 Mon Sep 17 00:00:00 2001 From: Alisehen <814073252@qq.com> Date: Thu, 15 May 2025 10:01:51 +0000 Subject: [PATCH 4/5] bug fix --- install.sh | 10 +--------- requirements-local_chat.txt | 1 - setup.py | 10 ++++++++++ 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/install.sh b/install.sh index c46c506..573826e 100644 --- a/install.sh +++ b/install.sh @@ -31,13 +31,6 @@ pip install -r ktransformers/server/requirements.txt echo "Installing ktransformers" KTRANSFORMERS_FORCE_BUILD=TRUE pip install -v . --no-build-isolation -# XPU-specific fix for triton -if [[ "$DEV_BACKEND" == "xpu" ]]; then - echo "Replacing triton for XPU backend" - pip uninstall -y triton pytorch-triton-xpu || true - pip install pytorch-triton-xpu==3.3.0 --extra-index-url https://download.pytorch.org/whl/xpu -fi - if [[ "$DEV_BACKEND" == "cuda" ]]; then echo "Installing custom_flashinfer for CUDA backend" pip install third_party/custom_flashinfer/ @@ -47,5 +40,4 @@ fi # cp -a csrc/balance_serve/build/third_party/prometheus-cpp/lib/libprometheus-cpp-*.so* $SITE_PACKAGES/ # patchelf --set-rpath '$ORIGIN' $SITE_PACKAGES/sched_ext.cpython* - -echo "Installation completed successfully" +echo "Installation completed successfully" \ No newline at end of file diff --git a/requirements-local_chat.txt b/requirements-local_chat.txt index dd3a206..25afaef 100644 --- a/requirements-local_chat.txt +++ b/requirements-local_chat.txt @@ -7,4 +7,3 @@ cpufeature; sys_platform == 'win32' or sys_platform == 'Windows' protobuf tiktoken blobfile -triton>=3.2 diff --git a/setup.py b/setup.py index 0961d93..c91d9dc 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,15 @@ except ImportError: MUSA_HOME=None KTRANSFORMERS_BUILD_XPU = torch.xpu.is_available() +# 检测 DEV_BACKEND 环境变量 +dev_backend = os.environ.get("DEV_BACKEND", "").lower() +if dev_backend == "xpu": + triton_dep = [ + "pytorch-triton-xpu==3.3.0" + ] +else: + triton_dep = ["triton>=3.2"] + with_balance = os.environ.get("USE_BALANCE_SERVE", "0") == "1" class CpuInstructInfo: @@ -659,6 +668,7 @@ else: setup( name=VersionInfo.PACKAGE_NAME, version=VersionInfo().get_package_version(), + install_requires=triton_dep, cmdclass={"bdist_wheel":BuildWheelsCommand ,"build_ext": CMakeBuild}, ext_modules=ext_modules ) From 5b08d5b07b93580a24142079cfcbcb57712f96a8 Mon Sep 17 00:00:00 2001 From: Alisehen <814073252@qq.com> Date: Sat, 17 May 2025 07:22:51 +0000 Subject: [PATCH 5/5] fix --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fbf0924..5880323 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,6 @@ dependencies = [ "build", "fire", "protobuf", - "triton >= 3.2" ] requires-python = ">=3.10"