CUDA: restrict PDL to CTK >= 12.3 due to MSVC issues (#23742)

This commit is contained in:
Oliver Simons 2026-05-27 14:21:04 +02:00 committed by GitHub
parent 2d0656fbdd
commit fda8528aa8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -110,11 +110,14 @@
# define GGML_CUDA_USE_CUB
#endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11070
// PDL host-side support (cudaLaunchKernelEx) requires CUDART >= 11.8 and excludes HIP/MUSA.
// PDL host-side support (cudaLaunchKernelEx) requires CUDART >= 11.8.
// However, this has been bugged in CTK < 12.3 for MSVC builds, see
// https://github.com/ggml-org/llama.cpp/pull/22522#discussion_r3302393293
// __CUDA_ARCH__ is undefined in host passes; GPU arch check happens in device-side code.
#if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11080
#if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && \
(CUDART_VERSION >= 12030 || (!(defined(_MSC_VER) && !defined(__clang__)) && CUDART_VERSION >= 11080))
# define GGML_CUDA_USE_PDL
#endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11080
#endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && (CUDART_VERSION >= 12030 || (!(defined(_MSC_VER) && !defined(__clang__)) && CUDART_VERSION >= 11080))
static __device__ __forceinline__ void ggml_cuda_pdl_sync() {
#if defined(GGML_CUDA_USE_PDL) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= GGML_CUDA_CC_HOPPER