mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-06 12:40:02 +00:00
Merge pull request #1307 from kvcache-ai/hyc
add xpu parameters to install.sh
This commit is contained in:
commit
32f3d7befb
9 changed files with 31 additions and 15 deletions
|
@ -100,10 +100,8 @@ git submodule update --init --recursive
|
||||||
|
|
||||||
# Install single NUMA dependencies
|
# Install single NUMA dependencies
|
||||||
USE_BALANCE_SERVE=1 bash ./install.sh
|
USE_BALANCE_SERVE=1 bash ./install.sh
|
||||||
pip install third_party/custom_flashinfer/
|
|
||||||
# For those who have two cpu and 1T RAM(Dual NUMA):
|
# For those who have two cpu and 1T RAM(Dual NUMA):
|
||||||
USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh
|
USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh
|
||||||
pip install third_party/custom_flashinfer/
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Running DeepSeek-R1-Q4KM Models
|
## Running DeepSeek-R1-Q4KM Models
|
||||||
|
|
|
@ -117,13 +117,11 @@ Download source code and compile:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
USE_BALANCE_SERVE=1 bash ./install.sh
|
USE_BALANCE_SERVE=1 bash ./install.sh
|
||||||
pip install third_party/custom_flashinfer/
|
|
||||||
```
|
```
|
||||||
- For Multi-concurrency with two cpu and 1T RAM:
|
- For Multi-concurrency with two cpu and 1T RAM:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh
|
USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh
|
||||||
pip install third_party/custom_flashinfer/
|
|
||||||
```
|
```
|
||||||
- For Windows (Windows native temporarily deprecated, please try WSL)
|
- For Windows (Windows native temporarily deprecated, please try WSL)
|
||||||
|
|
||||||
|
|
|
@ -68,10 +68,8 @@ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.o
|
||||||
```bash
|
```bash
|
||||||
# Install single NUMA dependencies
|
# Install single NUMA dependencies
|
||||||
USE_BALANCE_SERVE=1 bash ./install.sh
|
USE_BALANCE_SERVE=1 bash ./install.sh
|
||||||
pip install third_party/custom_flashinfer/
|
|
||||||
# For those who have two cpu and 1T RAM(Dual NUMA):
|
# For those who have two cpu and 1T RAM(Dual NUMA):
|
||||||
USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh
|
USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh
|
||||||
pip install third_party/custom_flashinfer/
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### 4. Use our custom config.json
|
### 4. Use our custom config.json
|
||||||
|
|
|
@ -62,9 +62,7 @@ cd ktransformers
|
||||||
git submodule update --init
|
git submodule update --init
|
||||||
|
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
bash install.sh
|
bash install.sh --dev xpu
|
||||||
pip uninstall triton pytorch-triton-xpu
|
|
||||||
pip install pytorch-triton-xpu==3.3.0 --extra-index-url https://download.pytorch.org/whl/xpu # to avoid potential triton import error
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Running DeepSeek-R1 Models
|
## Running DeepSeek-R1 Models
|
||||||
|
|
|
@ -127,10 +127,8 @@ cd ktransformers
|
||||||
git submodule update --init --recursive
|
git submodule update --init --recursive
|
||||||
# 如果使用双 numa 版本
|
# 如果使用双 numa 版本
|
||||||
USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh
|
USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh
|
||||||
pip install third_party/custom_flashinfer/
|
|
||||||
# 如果使用单 numa 版本
|
# 如果使用单 numa 版本
|
||||||
USE_BALANCE_SERVE=1 bash ./install.sh
|
USE_BALANCE_SERVE=1 bash ./install.sh
|
||||||
pip install third_party/custom_flashinfer/
|
|
||||||
# 启动命令
|
# 启动命令
|
||||||
python ktransformers/server/main.py --model_path <your model path> --gguf_path <your gguf path> --cpu_infer 62 --optimize_config_path <inject rule path> --port 10002 --chunk_size 256 --max_new_tokens 1024 --max_batch_size 4 --port 10002 --cache_lens 32768 --backend_type balance_serve
|
python ktransformers/server/main.py --model_path <your model path> --gguf_path <your gguf path> --cpu_infer 62 --optimize_config_path <inject rule path> --port 10002 --chunk_size 256 --max_new_tokens 1024 --max_batch_size 4 --port 10002 --cache_lens 32768 --backend_type balance_serve
|
||||||
```
|
```
|
||||||
|
|
20
install.sh
20
install.sh
|
@ -1,6 +1,20 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
# default backend
|
||||||
|
DEV="cuda"
|
||||||
|
|
||||||
|
# parse --dev argument
|
||||||
|
while [[ "$#" -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
--dev) DEV="$2"; shift ;;
|
||||||
|
*) echo "Unknown parameter passed: $1"; exit 1 ;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
export DEV_BACKEND="$DEV"
|
||||||
|
echo "Selected backend: $DEV_BACKEND"
|
||||||
|
|
||||||
# clear build dirs
|
# clear build dirs
|
||||||
rm -rf build
|
rm -rf build
|
||||||
rm -rf *.egg-info
|
rm -rf *.egg-info
|
||||||
|
@ -13,13 +27,17 @@ rm -rf ~/.ktransformers
|
||||||
echo "Installing python dependencies from requirements.txt"
|
echo "Installing python dependencies from requirements.txt"
|
||||||
pip install -r requirements-local_chat.txt
|
pip install -r requirements-local_chat.txt
|
||||||
pip install -r ktransformers/server/requirements.txt
|
pip install -r ktransformers/server/requirements.txt
|
||||||
|
|
||||||
echo "Installing ktransformers"
|
echo "Installing ktransformers"
|
||||||
KTRANSFORMERS_FORCE_BUILD=TRUE pip install -v . --no-build-isolation
|
KTRANSFORMERS_FORCE_BUILD=TRUE pip install -v . --no-build-isolation
|
||||||
|
|
||||||
|
if [[ "$DEV_BACKEND" == "cuda" ]]; then
|
||||||
|
echo "Installing custom_flashinfer for CUDA backend"
|
||||||
|
pip install third_party/custom_flashinfer/
|
||||||
|
fi
|
||||||
# SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])")
|
# SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])")
|
||||||
# echo "Copying thirdparty libs to $SITE_PACKAGES"
|
# echo "Copying thirdparty libs to $SITE_PACKAGES"
|
||||||
# cp -a csrc/balance_serve/build/third_party/prometheus-cpp/lib/libprometheus-cpp-*.so* $SITE_PACKAGES/
|
# cp -a csrc/balance_serve/build/third_party/prometheus-cpp/lib/libprometheus-cpp-*.so* $SITE_PACKAGES/
|
||||||
# patchelf --set-rpath '$ORIGIN' $SITE_PACKAGES/sched_ext.cpython*
|
# patchelf --set-rpath '$ORIGIN' $SITE_PACKAGES/sched_ext.cpython*
|
||||||
|
|
||||||
|
|
||||||
echo "Installation completed successfully"
|
echo "Installation completed successfully"
|
|
@ -30,7 +30,6 @@ dependencies = [
|
||||||
"build",
|
"build",
|
||||||
"fire",
|
"fire",
|
||||||
"protobuf",
|
"protobuf",
|
||||||
"triton >= 3.2"
|
|
||||||
]
|
]
|
||||||
|
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
|
|
|
@ -7,4 +7,3 @@ cpufeature; sys_platform == 'win32' or sys_platform == 'Windows'
|
||||||
protobuf
|
protobuf
|
||||||
tiktoken
|
tiktoken
|
||||||
blobfile
|
blobfile
|
||||||
triton>=3.2
|
|
||||||
|
|
10
setup.py
10
setup.py
|
@ -41,6 +41,15 @@ except ImportError:
|
||||||
MUSA_HOME=None
|
MUSA_HOME=None
|
||||||
KTRANSFORMERS_BUILD_XPU = torch.xpu.is_available()
|
KTRANSFORMERS_BUILD_XPU = torch.xpu.is_available()
|
||||||
|
|
||||||
|
# 检测 DEV_BACKEND 环境变量
|
||||||
|
dev_backend = os.environ.get("DEV_BACKEND", "").lower()
|
||||||
|
if dev_backend == "xpu":
|
||||||
|
triton_dep = [
|
||||||
|
"pytorch-triton-xpu==3.3.0"
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
triton_dep = ["triton>=3.2"]
|
||||||
|
|
||||||
with_balance = os.environ.get("USE_BALANCE_SERVE", "0") == "1"
|
with_balance = os.environ.get("USE_BALANCE_SERVE", "0") == "1"
|
||||||
|
|
||||||
class CpuInstructInfo:
|
class CpuInstructInfo:
|
||||||
|
@ -659,6 +668,7 @@ else:
|
||||||
setup(
|
setup(
|
||||||
name=VersionInfo.PACKAGE_NAME,
|
name=VersionInfo.PACKAGE_NAME,
|
||||||
version=VersionInfo().get_package_version(),
|
version=VersionInfo().get_package_version(),
|
||||||
|
install_requires=triton_dep,
|
||||||
cmdclass={"bdist_wheel":BuildWheelsCommand ,"build_ext": CMakeBuild},
|
cmdclass={"bdist_wheel":BuildWheelsCommand ,"build_ext": CMakeBuild},
|
||||||
ext_modules=ext_modules
|
ext_modules=ext_modules
|
||||||
)
|
)
|
||||||
|
|
Loading…
Add table
Reference in a new issue