Implement multi-batch support for v2, v3, and r1 models with backend_type configured as ktransformers.

2025-09-10 23:34:35 +00:00 · 2025-07-09 09:09:47 +00:00 · 2025-07-09 09:09:47 +00:00 · a6ab9e349c
commit a6ab9e349c
parent 890b0f1622
6 changed files with 383 additions and 52 deletions
--- a/ktransformers/operators/linear.py
+++ b/ktransformers/operators/linear.py
@ -670,6 +670,7 @@ class KLinearMarlin(KLinearBase):
            padding_input[:,:self.orin_in_features] = x
            x = padding_input
        marlin_s = self.marlin_s.to(x.dtype)
+        x = x.contiguous()
        x = KTransformersOps.gptq_marlin_gemm(
            x,
            self.marlin_q_w,