mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 15:29:39 +00:00
support windows support q4_0 and q5_0 dequant on cpu Add CopyRight from pygguf(It was added before, but disappear after merge). Add some TODO in the code.
This commit is contained in:
parent
442e13bc97
commit
0a2fd52cea
32 changed files with 248 additions and 108 deletions
|
@ -46,6 +46,7 @@ class StaticCache(transformers.StaticCache):
|
|||
self.value_cache: List[torch.Tensor] = []
|
||||
cache_shape = (max_batch_size, self.num_key_value_heads, self.max_cache_len, self.head_dim)
|
||||
if config.architectures[0] == "DeepseekV2ForCausalLM":
|
||||
# TODO: for deepseek, cache_shape is different whether using Absorbed MLA, check it automatically
|
||||
# key_shape = (max_batch_size, self.num_key_value_heads, self.max_cache_len, config.qk_rope_head_dim + config.qk_nope_head_dim)
|
||||
# value_shape = (max_batch_size, self.num_key_value_heads, self.max_cache_len, config.v_head_dim)
|
||||
key_shape = (max_batch_size, 1, self.max_cache_len, config.qk_rope_head_dim)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue