From c3d0ac80c6b1b9c3bf964a45e3ec8669f7e498a7 Mon Sep 17 00:00:00 2001 From: qiyuxinlin <1668068727@qq.com> Date: Wed, 14 May 2025 13:13:10 +0000 Subject: [PATCH] update readme --- doc/en/balance-serve.md | 2 ++ doc/en/install.md | 2 ++ doc/en/llama4.md | 4 +++- doc/zh/DeepseekR1_V3_tutorial_zh.md | 2 ++ 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/en/balance-serve.md b/doc/en/balance-serve.md index 5217968..4d72fbd 100644 --- a/doc/en/balance-serve.md +++ b/doc/en/balance-serve.md @@ -100,8 +100,10 @@ git submodule update --init --recursive # Install single NUMA dependencies USE_BALANCE_SERVE=1 bash ./install.sh +pip install third_party/custom_flashinfer/ # For those who have two cpu and 1T RAM(Dual NUMA): USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh +pip install third_party/custom_flashinfer/ ``` ## Running DeepSeek-R1-Q4KM Models diff --git a/doc/en/install.md b/doc/en/install.md index 031b541..aee923b 100644 --- a/doc/en/install.md +++ b/doc/en/install.md @@ -117,11 +117,13 @@ Download source code and compile: ```shell USE_BALANCE_SERVE=1 bash ./install.sh + pip install third_party/custom_flashinfer/ ``` - For Multi-concurrency with two cpu and 1T RAM: ```shell USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh + pip install third_party/custom_flashinfer/ ``` - For Windows (Windows native temporarily deprecated, please try WSL) diff --git a/doc/en/llama4.md b/doc/en/llama4.md index 8592871..0daeb9c 100644 --- a/doc/en/llama4.md +++ b/doc/en/llama4.md @@ -67,9 +67,11 @@ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.o ```bash # Install single NUMA dependencies -USE_BALANCE_SERVE=1 bash ./install.sh +USE_BALANCE_SERVE=1 bash ./ +pip install third_party/custom_flashinfer/ # For those who have two cpu and 1T RAM(Dual NUMA): USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh +pip install third_party/custom_flashinfer/ ``` ### 4. Use our custom config.json diff --git a/doc/zh/DeepseekR1_V3_tutorial_zh.md b/doc/zh/DeepseekR1_V3_tutorial_zh.md index 5645f4f..bbe109c 100644 --- a/doc/zh/DeepseekR1_V3_tutorial_zh.md +++ b/doc/zh/DeepseekR1_V3_tutorial_zh.md @@ -127,8 +127,10 @@ cd ktransformers git submodule update --init --recursive # 如果使用双 numa 版本 USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh +pip install third_party/custom_flashinfer/ # 如果使用单 numa 版本 USE_BALANCE_SERVE=1 bash ./install.sh +pip install third_party/custom_flashinfer/ # 启动命令 python ktransformers/server/main.py --model_path --gguf_path --cpu_infer 62 --optimize_config_path --port 10002 --chunk_size 256 --max_new_tokens 1024 --max_batch_size 4 --port 10002 --cache_lens 32768 --backend_type balance_serve ```