mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-06 04:30:03 +00:00
update marlin expert example
This commit is contained in:
parent
a2fc2a8658
commit
0564ac8465
1 changed files with 20 additions and 2 deletions
|
@ -79,6 +79,24 @@
|
||||||
generate_device: "cuda:1"
|
generate_device: "cuda:1"
|
||||||
prefill_device: "cuda:1"
|
prefill_device: "cuda:1"
|
||||||
|
|
||||||
|
- match:
|
||||||
|
name: "^model\\.layers\\.(0|[1-4])\\.mlp\\.experts$" # inject experts in layer 0~4 as marlin expert
|
||||||
|
replace:
|
||||||
|
class: ktransformers.operators.experts.KTransformersExperts
|
||||||
|
kwargs:
|
||||||
|
generate_device: "cuda:0" # run in cuda:0
|
||||||
|
generate_op: "KExpertsMarlin"
|
||||||
|
recursive: False
|
||||||
|
|
||||||
|
- match:
|
||||||
|
name: "^model\\.layers\\.([3][0])\\.mlp\\.experts$" # inject experts in layer 30~31 as marlin expert
|
||||||
|
replace:
|
||||||
|
class: ktransformers.operators.experts.KTransformersExperts
|
||||||
|
kwargs:
|
||||||
|
generate_device: "cuda:1"
|
||||||
|
generate_op: "KExpertsMarlin"
|
||||||
|
recursive: False
|
||||||
|
|
||||||
- match:
|
- match:
|
||||||
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.experts$"
|
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.experts$"
|
||||||
replace:
|
replace:
|
||||||
|
@ -139,5 +157,5 @@
|
||||||
replace:
|
replace:
|
||||||
class: "default"
|
class: "default"
|
||||||
kwargs:
|
kwargs:
|
||||||
generate_device: "cuda:1"
|
generate_device: "cuda:0"
|
||||||
prefill_device: "cuda:1"
|
prefill_device: "cuda:0"
|
||||||
|
|
Loading…
Add table
Reference in a new issue