mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-11 15:54:37 +00:00
[fix] bugs about Qwen57B, install requirement, Dockerfile
This commit is contained in:
parent
c80490a95e
commit
49cce0c437
3 changed files with 3 additions and 8 deletions
|
@ -12,6 +12,7 @@ EOF
|
|||
|
||||
FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel as compile_server
|
||||
WORKDIR /workspace
|
||||
ENV CUDA_HOME /usr/local/cuda
|
||||
COPY --from=web_compile /home/ktransformers /workspace/ktransformers
|
||||
RUN <<EOF
|
||||
apt update -y && apt install -y --no-install-recommends \
|
||||
|
@ -27,7 +28,7 @@ git submodule init &&
|
|||
git submodule update &&
|
||||
pip install ninja pyproject numpy cpufeature &&
|
||||
pip install flash-attn &&
|
||||
CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9" pip install . --no-build-isolation --verbose &&
|
||||
CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX" pip install . --no-build-isolation --verbose &&
|
||||
pip cache purge
|
||||
EOF
|
||||
|
||||
|
|
|
@ -1,10 +1,3 @@
|
|||
- match:
|
||||
name: "^model\\.layers\\..*\\."
|
||||
replace:
|
||||
class: "default"
|
||||
kwargs:
|
||||
generate_device: "cuda"
|
||||
prefill_device: "cuda"
|
||||
- match:
|
||||
class: ktransformers.models.modeling_qwen2_moe.Qwen2MoeRotaryEmbedding
|
||||
replace:
|
||||
|
|
|
@ -18,6 +18,7 @@ dependencies = [
|
|||
"torch >= 2.3.0",
|
||||
"transformers == 4.43.2",
|
||||
"fastapi >= 0.111.0",
|
||||
"uvicorn >= 0.30.1",
|
||||
"langchain >= 0.2.0",
|
||||
"blessed >= 1.20.0",
|
||||
"accelerate >= 0.31.0",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue