diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index debcb6e54..d650b5fbd 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -1153,8 +1153,8 @@ void launch_fattn( GGML_ASSERT(block_dim.x % warp_size == 0); - const ggml_cuda_kernel_launch_params launch_params = ggml_cuda_kernel_launch_params(blocks_num, block_dim, nbytes_shared, main_stream); - ggml_cuda_kernel_launch(fattn_kernel, launch_params, + // disabled PDL enrollment for now due to a compiler bug. + fattn_kernel<<>>( (const char *) Q->data, K_data, V_data,