Merge pull request #657 from kvcache-ai/feat-absorb-for-long-prefill

Feat absorb for long prefill
2025-09-09 13:55:27 +00:00 · 2025-02-25 16:53:21 +08:00 · 2025-02-25 16:53:21 +08:00 · b443c7dfa2
commit b443c7dfa2
parent 050b745a6e f4c198bd42
11 changed files with 193 additions and 43 deletions
--- a/ktransformers/operators/experts.py
+++ b/ktransformers/operators/experts.py
@ -159,7 +159,7 @@ class KExpertsCPU(KExpertsBase):
        down_ptr = ctypes.addressof(
            ctypes.cast(self.down.ctypes.data, ctypes.POINTER(ctypes.c_uint64)).contents
        )
-        # print(self.gate_qtype, self.up_qtype, self.down_qtype)
+        #print(self.gate_type, self.up_type, self.down_type)
        n_routed_experts = self.n_routed_experts
        # n_routed_experts = len(self.orig_module)
        moe_config = MOEConfig(
@ -459,9 +459,9 @@ class KExpertsTorch(KExpertsBase):
                    self.up[i] = w["up"][i, ...].to(device=device, dtype=self.dtype)
                    self.down[i] = w["down"][i, ...].to(device=device, dtype=self.dtype)
        
-        self.up = torch.cat(self.gate, dim=0)
+        self.up = torch.cat(self.up, dim=0)
        self.gate = torch.cat(self.gate, dim=0)
-        self.down = torch.cat(self.gate, dim=0)
+        self.down = torch.cat(self.down, dim=0)
        return 

    def unload(self):