mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-15 17:49:42 +00:00
fix-update-flashinfer_wrapper_local_chat
This commit is contained in:
parent
5474be5299
commit
477ac28a9c
4 changed files with 15 additions and 4 deletions
|
@ -293,6 +293,7 @@
|
|||
kwargs:
|
||||
generate_device: "cuda:0"
|
||||
prefill_device: "cuda:0"
|
||||
absorb_for_prefill: False
|
||||
|
||||
# GPU 1: layers 15–29
|
||||
- match:
|
||||
|
@ -302,6 +303,7 @@
|
|||
kwargs:
|
||||
generate_device: "cuda:1"
|
||||
prefill_device: "cuda:1"
|
||||
absorb_for_prefill: False
|
||||
|
||||
# GPU 2: layers 30–44
|
||||
- match:
|
||||
|
@ -311,6 +313,7 @@
|
|||
kwargs:
|
||||
generate_device: "cuda:2"
|
||||
prefill_device: "cuda:2"
|
||||
absorb_for_prefill: False
|
||||
|
||||
# GPU 3: layers 45–60
|
||||
- match:
|
||||
|
@ -320,6 +323,7 @@
|
|||
kwargs:
|
||||
generate_device: "cuda:3"
|
||||
prefill_device: "cuda:3"
|
||||
absorb_for_prefill: False
|
||||
|
||||
# === Overall Model Replacement with Transfer Map ===
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue