Merge pull request #657 from kvcache-ai/feat-absorb-for-long-prefill

Feat absorb for long prefill
This commit is contained in:
Atream 2025-02-25 16:53:21 +08:00 committed by GitHub
commit b443c7dfa2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 193 additions and 43 deletions

View file

@ -330,6 +330,8 @@ class GGUFLoader:
values = GGML_DEQUANTIZE[ggml_name](data)
values = torch.from_numpy(values.copy())
if ggml_name == "BF16":
values = values.view(torch.bfloat16)
values = values.view(shape[-2::-1])
return values