mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-11 07:44:35 +00:00
[fix] bugs about Qwen57B, install requirement, Dockerfile
This commit is contained in:
parent
c80490a95e
commit
49cce0c437
3 changed files with 3 additions and 8 deletions
|
@ -12,6 +12,7 @@ EOF
|
||||||
|
|
||||||
FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel as compile_server
|
FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel as compile_server
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
|
ENV CUDA_HOME /usr/local/cuda
|
||||||
COPY --from=web_compile /home/ktransformers /workspace/ktransformers
|
COPY --from=web_compile /home/ktransformers /workspace/ktransformers
|
||||||
RUN <<EOF
|
RUN <<EOF
|
||||||
apt update -y && apt install -y --no-install-recommends \
|
apt update -y && apt install -y --no-install-recommends \
|
||||||
|
@ -27,7 +28,7 @@ git submodule init &&
|
||||||
git submodule update &&
|
git submodule update &&
|
||||||
pip install ninja pyproject numpy cpufeature &&
|
pip install ninja pyproject numpy cpufeature &&
|
||||||
pip install flash-attn &&
|
pip install flash-attn &&
|
||||||
CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9" pip install . --no-build-isolation --verbose &&
|
CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX" pip install . --no-build-isolation --verbose &&
|
||||||
pip cache purge
|
pip cache purge
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,3 @@
|
||||||
- match:
|
|
||||||
name: "^model\\.layers\\..*\\."
|
|
||||||
replace:
|
|
||||||
class: "default"
|
|
||||||
kwargs:
|
|
||||||
generate_device: "cuda"
|
|
||||||
prefill_device: "cuda"
|
|
||||||
- match:
|
- match:
|
||||||
class: ktransformers.models.modeling_qwen2_moe.Qwen2MoeRotaryEmbedding
|
class: ktransformers.models.modeling_qwen2_moe.Qwen2MoeRotaryEmbedding
|
||||||
replace:
|
replace:
|
||||||
|
|
|
@ -18,6 +18,7 @@ dependencies = [
|
||||||
"torch >= 2.3.0",
|
"torch >= 2.3.0",
|
||||||
"transformers == 4.43.2",
|
"transformers == 4.43.2",
|
||||||
"fastapi >= 0.111.0",
|
"fastapi >= 0.111.0",
|
||||||
|
"uvicorn >= 0.30.1",
|
||||||
"langchain >= 0.2.0",
|
"langchain >= 0.2.0",
|
||||||
"blessed >= 1.20.0",
|
"blessed >= 1.20.0",
|
||||||
"accelerate >= 0.31.0",
|
"accelerate >= 0.31.0",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue