mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-19 08:00:25 +00:00
try to fix some fattn inconsistencies
This commit is contained in:
parent
c12f9e3b7c
commit
d7c2f27749
1 changed files with 5 additions and 2 deletions
|
|
@ -306,7 +306,10 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
|
|||
|
||||
//kcpp: use wmma to fix cu11 incoherence
|
||||
if (ggml_cuda_should_use_wmma_fattn(cc) && (ggml_cuda_highest_compiled_arch(cc) <= GGML_CUDA_CC_TURING || cc == GGML_CUDA_CC_TURING)) {
|
||||
return BEST_FATTN_KERNEL_WMMA_F16;
|
||||
if(Q->ne[0] != 40 && Q->ne[0] != 72 && Q->ne[0] != 576) //kcpp: these sizes not supported in wmma
|
||||
{
|
||||
return BEST_FATTN_KERNEL_WMMA_F16;
|
||||
}
|
||||
}
|
||||
|
||||
return BEST_FATTN_KERNEL_MMA_F16;
|
||||
|
|
@ -330,7 +333,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
|
|||
}
|
||||
}
|
||||
//kcpp: patch from previous version for my sanity. it worked before, idk it should work now.
|
||||
if (Q->ne[1] <= 8 || Q->ne[0] == 256) {
|
||||
if ((Q->ne[1] <= 8 || Q->ne[0] == 256) && can_use_vector_kernel) {
|
||||
return BEST_FATTN_KERNEL_VEC;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue