fix-update-flashinfer_wrapper_local_chat

This commit is contained in:
Atream 2025-02-25 12:47:31 +00:00
parent 5474be5299
commit 477ac28a9c
4 changed files with 15 additions and 4 deletions

View file

@ -293,6 +293,7 @@
kwargs:
generate_device: "cuda:0"
prefill_device: "cuda:0"
absorb_for_prefill: False
# GPU 1: layers 1529
- match:
@ -302,6 +303,7 @@
kwargs:
generate_device: "cuda:1"
prefill_device: "cuda:1"
absorb_for_prefill: False
# GPU 2: layers 3044
- match:
@ -311,6 +313,7 @@
kwargs:
generate_device: "cuda:2"
prefill_device: "cuda:2"
absorb_for_prefill: False
# GPU 3: layers 4560
- match:
@ -320,6 +323,7 @@
kwargs:
generate_device: "cuda:3"
prefill_device: "cuda:3"
absorb_for_prefill: False
# === Overall Model Replacement with Transfer Map ===