Merge pull request #657 from kvcache-ai/feat-absorb-for-long-prefill

Feat absorb for long prefill
This commit is contained in:
Atream 2025-02-25 16:53:21 +08:00 committed by GitHub
commit b443c7dfa2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 193 additions and 43 deletions

View file

@ -159,7 +159,7 @@ class KExpertsCPU(KExpertsBase):
down_ptr = ctypes.addressof(
ctypes.cast(self.down.ctypes.data, ctypes.POINTER(ctypes.c_uint64)).contents
)
# print(self.gate_qtype, self.up_qtype, self.down_qtype)
#print(self.gate_type, self.up_type, self.down_type)
n_routed_experts = self.n_routed_experts
# n_routed_experts = len(self.orig_module)
moe_config = MOEConfig(
@ -459,9 +459,9 @@ class KExpertsTorch(KExpertsBase):
self.up[i] = w["up"][i, ...].to(device=device, dtype=self.dtype)
self.down[i] = w["down"][i, ...].to(device=device, dtype=self.dtype)
self.up = torch.cat(self.gate, dim=0)
self.up = torch.cat(self.up, dim=0)
self.gate = torch.cat(self.gate, dim=0)
self.down = torch.cat(self.gate, dim=0)
self.down = torch.cat(self.down, dim=0)
return
def unload(self):