Update modeling_deepseek_v3.py

This commit is contained in:
Atream 2025-04-03 17:13:06 +08:00 committed by GitHub
parent 016d11e6d4
commit e36ddc36a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -436,7 +436,7 @@ class MoEGate(nn.Module):
### select top-k experts ### select top-k experts
if self.topk_method == "noaux_tc": if self.topk_method == "noaux_tc":
assert not self.training #assert not self.training
scores_for_choice = scores.view(bsz * seq_len, -1) + self.e_score_correction_bias.unsqueeze(0) scores_for_choice = scores.view(bsz * seq_len, -1) + self.e_score_correction_bias.unsqueeze(0)
group_scores = ( group_scores = (
scores_for_choice.view(bsz * seq_len, self.n_group, -1).topk(2, dim=-1)[0].sum(dim = -1) scores_for_choice.view(bsz * seq_len, self.n_group, -1).topk(2, dim=-1)[0].sum(dim = -1)