update for cu13 builds (no ci will be provided)

2026-05-19 08:00:25 +00:00 · 2025-09-26 16:01:43 +08:00 · 2025-09-26 16:01:43 +08:00 · 1a4f54dd11
commit 1a4f54dd11
parent 8b5ebfcc8d
3 changed files with 13 additions and 1 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -130,7 +130,10 @@ if (LLAMA_CUBLAS)
        # 75 == int8 tensor cores
        # 80 == Ampere, asynchronous data loading, faster tensor core instructions
        message("CUDA Toolkit Version: ${CUDAToolkit_VERSION}")
-        if(CUDAToolkit_VERSION VERSION_GREATER 12)
+        if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 13)
+                add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
+                set(CMAKE_CUDA_ARCHITECTURES "75-virtual;80-virtual;86-virtual") # lowest CUDA 13 standard
+        elseif(CUDAToolkit_VERSION VERSION_GREATER 12)
                add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
                set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
        else()
--- a/6
+++ b/6
@ -231,6 +231,12 @@ NVCCFLAGS += -Wno-deprecated-gpu-targets \
             -gencode arch=compute_75,code=compute_75 \
             -gencode arch=compute_80,code=compute_80

+else ifdef LLAMA_ARCHES_CU13
+NVCCFLAGS += -Wno-deprecated-gpu-targets \
+             -gencode arch=compute_75,code=compute_75 \
+             -gencode arch=compute_80,code=compute_80 \
+             -gencode arch=compute_86,code=compute_86
+
 else
 NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all
 endif
--- a/koboldcpp.sh
+++ b/koboldcpp.sh
@ -39,6 +39,9 @@ fi
 if [ -n "$ARCHES_CU12" ]; then
 	ARCHES_FLAG="LLAMA_ARCHES_CU12=1"
 fi
+if [ -n "$ARCHES_CU13" ]; then
+	ARCHES_FLAG="LLAMA_ARCHES_CU13=1"
+fi
 if [ -n "$NO_WMMA" ]; then
 	NO_WMMA_FLAG="LLAMA_NO_WMMA=1"
 fi