mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-28 11:49:51 +00:00
update yaml example; update version idx; update docker file
This commit is contained in:
parent
6735beb5b6
commit
8747c099f2
4 changed files with 34 additions and 34 deletions
|
|
@ -24,7 +24,7 @@
|
|||
prefill_device: "cuda:1"
|
||||
|
||||
- match:
|
||||
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.(?!self_attn).*$" # regular expression
|
||||
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
|
||||
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
||||
replace:
|
||||
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
||||
|
|
@ -35,7 +35,7 @@
|
|||
prefill_op: "KLinearTorch"
|
||||
|
||||
- match:
|
||||
name: "^model\\.layers\\.([345][0-9])\\.(?!self_attn).*$" # regular expression
|
||||
name: "^model\\.layers\\.([345][0-9])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
|
||||
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
||||
replace:
|
||||
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue