update rope calculation; update modeling.py; update gate for moe

This commit is contained in:
Azure 2025-02-01 07:32:21 +00:00
parent 5a50b34627
commit f873558a89
11 changed files with 402 additions and 412 deletions

View file

@ -135,3 +135,7 @@ class StaticCache(transformers.StaticCache):
# In-place ops prevent breaking the static address
self.key_cache[layer_idx].zero_()
self.value_cache[layer_idx].zero_()
def get_max_cache_shape(self) -> Tuple[int, int, int, int]:
"""Returns the maximum shape of the cache."""
return self.max_cache_len