[fix] bugs about Qwen57B, install requirement, Dockerfile

2025-09-11 15:54:37 +00:00 · 2024-08-30 03:24:26 +00:00 · 2024-08-30 03:24:26 +00:00 · 49cce0c437
commit 49cce0c437
parent c80490a95e
3 changed files with 3 additions and 8 deletions
--- a/3
+++ b/3
@ -12,6 +12,7 @@ EOF

 FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel as compile_server
 WORKDIR /workspace
+ENV CUDA_HOME /usr/local/cuda
 COPY --from=web_compile /home/ktransformers /workspace/ktransformers
 RUN <<EOF
 apt update -y &&  apt install -y  --no-install-recommends \
@ -27,7 +28,7 @@ git submodule init &&
 git submodule update &&
 pip install ninja pyproject numpy cpufeature &&
 pip install flash-attn &&
-CPU_INSTRUCT=NATIVE  KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9" pip install . --no-build-isolation --verbose &&
+CPU_INSTRUCT=NATIVE  KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX" pip install . --no-build-isolation --verbose &&
 pip cache purge
 EOF

--- a/ktransformers/optimize/optimize_rules/Qwen2-57B-A14B-Instruct.yaml
+++ b/ktransformers/optimize/optimize_rules/Qwen2-57B-A14B-Instruct.yaml
@ -1,10 +1,3 @@
- match:
-    name: "^model\\.layers\\..*\\."
-  replace:
-    class: "default"
-    kwargs:
-      generate_device: "cuda"
-      prefill_device: "cuda"
 - match:
    class: ktransformers.models.modeling_qwen2_moe.Qwen2MoeRotaryEmbedding
  replace:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -18,6 +18,7 @@ dependencies = [
  "torch >= 2.3.0",
  "transformers == 4.43.2",
  "fastapi >= 0.111.0",
+  "uvicorn >= 0.30.1",
  "langchain >= 0.2.0",
  "blessed >= 1.20.0",
  "accelerate >= 0.31.0",