support windows support q4_0 and q5_0 dequant on cpu Add CopyRight from pygguf(It was added before, but disappear after merge). Add some TODO in the code.

This commit is contained in:
Atream 2024-08-07 12:19:06 +08:00
parent 442e13bc97
commit 0a2fd52cea
32 changed files with 248 additions and 108 deletions

View file

@ -46,6 +46,7 @@ class StaticCache(transformers.StaticCache):
self.value_cache: List[torch.Tensor] = []
cache_shape = (max_batch_size, self.num_key_value_heads, self.max_cache_len, self.head_dim)
if config.architectures[0] == "DeepseekV2ForCausalLM":
# TODO: for deepseek, cache_shape is different whether using Absorbed MLA, check it automatically
# key_shape = (max_batch_size, self.num_key_value_heads, self.max_cache_len, config.qk_rope_head_dim + config.qk_nope_head_dim)
# value_shape = (max_batch_size, self.num_key_value_heads, self.max_cache_len, config.v_head_dim)
key_shape = (max_batch_size, 1, self.max_cache_len, config.qk_rope_head_dim)