mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 23:34:35 +00:00
Implement multi-batch support for v2, v3, and r1 models with backend_type configured as ktransformers.
This commit is contained in:
parent
890b0f1622
commit
a6ab9e349c
6 changed files with 383 additions and 52 deletions
|
@ -670,6 +670,7 @@ class KLinearMarlin(KLinearBase):
|
|||
padding_input[:,:self.orin_in_features] = x
|
||||
x = padding_input
|
||||
marlin_s = self.marlin_s.to(x.dtype)
|
||||
x = x.contiguous()
|
||||
x = KTransformersOps.gptq_marlin_gemm(
|
||||
x,
|
||||
self.marlin_q_w,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue