[fix] bugs about Qwen57B, install requirement, Dockerfile

This commit is contained in:
chenxl 2024-08-30 03:24:26 +00:00
parent c80490a95e
commit 49cce0c437
3 changed files with 3 additions and 8 deletions

View file

@ -12,6 +12,7 @@ EOF
FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel as compile_server
WORKDIR /workspace
ENV CUDA_HOME /usr/local/cuda
COPY --from=web_compile /home/ktransformers /workspace/ktransformers
RUN <<EOF
apt update -y && apt install -y --no-install-recommends \
@ -27,7 +28,7 @@ git submodule init &&
git submodule update &&
pip install ninja pyproject numpy cpufeature &&
pip install flash-attn &&
CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9" pip install . --no-build-isolation --verbose &&
CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX" pip install . --no-build-isolation --verbose &&
pip cache purge
EOF

View file

@ -1,10 +1,3 @@
- match:
name: "^model\\.layers\\..*\\."
replace:
class: "default"
kwargs:
generate_device: "cuda"
prefill_device: "cuda"
- match:
class: ktransformers.models.modeling_qwen2_moe.Qwen2MoeRotaryEmbedding
replace:

View file

@ -18,6 +18,7 @@ dependencies = [
"torch >= 2.3.0",
"transformers == 4.43.2",
"fastapi >= 0.111.0",
"uvicorn >= 0.30.1",
"langchain >= 0.2.0",
"blessed >= 1.20.0",
"accelerate >= 0.31.0",