diff --git a/CMakeLists.txt b/CMakeLists.txt index 97d36eb7a..599fee389 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,6 +96,8 @@ if (LLAMA_CUBLAS) if (CUDAToolkit_FOUND) message(STATUS "cuBLAS found") + add_compile_options("$<$:-Xcudafe;--diag_suppress=177>") + enable_language(CUDA) add_compile_definitions(GGML_USE_LLAMAFILE) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index ee4d6c8dd..384c2de7e 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -113,7 +113,7 @@ // PDL host-side support (cudaLaunchKernelEx) requires CUDART >= 11.8 and excludes HIP/MUSA. // __CUDA_ARCH__ is undefined in host passes; GPU arch check happens in device-side code. #if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11080 -# define GGML_CUDA_USE_PDL +// # define GGML_CUDA_USE_PDL //KCPP fix: do not use PDL as it breaks cu12.1 build too #endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11080 static __device__ __forceinline__ void ggml_cuda_pdl_sync() {