merge main; Add torch q8 linear

This commit is contained in:
Azure-Tang 2025-03-14 05:52:07 -04:00
parent 6c4ed59175
commit ed8437413b
27 changed files with 1561 additions and 114 deletions

View file

@ -13,7 +13,7 @@
kwargs:
generate_device: "cuda"
prefill_device: "cuda"
generate_op: "KLinearMarlin"
generate_op: "KLinearQ8"
prefill_op: "KLinearTorch"
- match:
@ -22,9 +22,9 @@
replace:
class: ktransformers.operators.linear.KTransformersLinear
kwargs:
generate_device: "cuda"
generate_device: "cpu"
prefill_device: "cuda"
generate_op: "KLinearMarlin"
generate_op: "KLinearTorch"
prefill_op: "KLinearTorch"
- match: