mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-05 20:19:51 +00:00
update marlin expert example
This commit is contained in:
parent
a2fc2a8658
commit
0564ac8465
1 changed files with 20 additions and 2 deletions
|
@ -79,6 +79,24 @@
|
|||
generate_device: "cuda:1"
|
||||
prefill_device: "cuda:1"
|
||||
|
||||
- match:
|
||||
name: "^model\\.layers\\.(0|[1-4])\\.mlp\\.experts$" # inject experts in layer 0~4 as marlin expert
|
||||
replace:
|
||||
class: ktransformers.operators.experts.KTransformersExperts
|
||||
kwargs:
|
||||
generate_device: "cuda:0" # run in cuda:0
|
||||
generate_op: "KExpertsMarlin"
|
||||
recursive: False
|
||||
|
||||
- match:
|
||||
name: "^model\\.layers\\.([3][0])\\.mlp\\.experts$" # inject experts in layer 30~31 as marlin expert
|
||||
replace:
|
||||
class: ktransformers.operators.experts.KTransformersExperts
|
||||
kwargs:
|
||||
generate_device: "cuda:1"
|
||||
generate_op: "KExpertsMarlin"
|
||||
recursive: False
|
||||
|
||||
- match:
|
||||
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.experts$"
|
||||
replace:
|
||||
|
@ -139,5 +157,5 @@
|
|||
replace:
|
||||
class: "default"
|
||||
kwargs:
|
||||
generate_device: "cuda:1"
|
||||
prefill_device: "cuda:1"
|
||||
generate_device: "cuda:0"
|
||||
prefill_device: "cuda:0"
|
||||
|
|
Loading…
Add table
Reference in a new issue