diff --git a/CMakeLists.txt b/CMakeLists.txt
index 97d36eb7a..599fee389 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -96,6 +96,8 @@ if (LLAMA_CUBLAS)
     if (CUDAToolkit_FOUND)
         message(STATUS "cuBLAS found")
 
+        add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:-Xcudafe;--diag_suppress=177>")
+
         enable_language(CUDA)
 
         add_compile_definitions(GGML_USE_LLAMAFILE)
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
index ee4d6c8dd..384c2de7e 100644
--- a/ggml/src/ggml-cuda/common.cuh
+++ b/ggml/src/ggml-cuda/common.cuh
@@ -113,7 +113,7 @@
 // PDL host-side support (cudaLaunchKernelEx) requires CUDART >= 11.8 and excludes HIP/MUSA.
 // __CUDA_ARCH__  is undefined in host passes; GPU arch check happens in device-side code.
 #if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11080
-#    define GGML_CUDA_USE_PDL
+// #    define GGML_CUDA_USE_PDL //KCPP fix: do not use PDL as it breaks cu12.1 build too
 #endif  // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11080
 
 static __device__ __forceinline__ void ggml_cuda_pdl_sync() {