From f545f4df756e6ec981e5e346bcc7e7a0ee2cb643 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Fri, 22 Aug 2025 17:36:29 +0800 Subject: [PATCH] with forced wmma for cu11 --- ggml/src/ggml-cuda/fattn.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu index c435bf5a7..3440ed287 100644 --- a/ggml/src/ggml-cuda/fattn.cu +++ b/ggml/src/ggml-cuda/fattn.cu @@ -396,9 +396,9 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const } //kcpp: use wmma to fix cu11 incoherence - // if (fp16_mma_available(cc) && (ggml_cuda_highest_compiled_arch(cc) <= GGML_CUDA_CC_TURING || cc == GGML_CUDA_CC_TURING)) { - // return BEST_FATTN_KERNEL_WMMA_F16; - // } + if (fp16_mma_available(cc) && (ggml_cuda_highest_compiled_arch(cc) <= GGML_CUDA_CC_TURING || cc == GGML_CUDA_CC_TURING)) { + return BEST_FATTN_KERNEL_WMMA_F16; + } return BEST_FATTN_KERNEL_MMA_F16; }