mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-16 19:59:16 +00:00
fix bad merge
This commit is contained in:
parent
35f524d3e2
commit
74c6daba1a
1 changed files with 6 additions and 0 deletions
|
|
@ -479,6 +479,12 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
|
|||
return BEST_FATTN_KERNEL_MMA_F16;
|
||||
}
|
||||
|
||||
const int ncols2_max = Q->ne[0] == 320 ? 32 : ((Q->ne[0] == 576 || Q->ne[0] == 192) ? 16 : 8);
|
||||
int gqa_ratio_eff = 1;
|
||||
while (gqa_ratio % (2*gqa_ratio_eff) == 0 && gqa_ratio_eff < ncols2_max) {
|
||||
gqa_ratio_eff *= 2;
|
||||
}
|
||||
|
||||
// Use the WMMA kernel if possible but only for HIP
|
||||
#if defined(GGML_HIP_ROCWMMA_FATTN)
|
||||
if (ggml_cuda_should_use_wmma_fattn(cc) && K->ne[1] % FATTN_KQ_STRIDE == 0 && Q->ne[0] != 40 && Q->ne[0] != 72 && Q->ne[0] != 192 && Q->ne[0] != 512 && Q->ne[0] != 576) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue