mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-19 08:00:25 +00:00
update for cu13 builds (no ci will be provided)
This commit is contained in:
parent
8b5ebfcc8d
commit
1a4f54dd11
3 changed files with 13 additions and 1 deletions
|
|
@ -130,7 +130,10 @@ if (LLAMA_CUBLAS)
|
|||
# 75 == int8 tensor cores
|
||||
# 80 == Ampere, asynchronous data loading, faster tensor core instructions
|
||||
message("CUDA Toolkit Version: ${CUDAToolkit_VERSION}")
|
||||
if(CUDAToolkit_VERSION VERSION_GREATER 12)
|
||||
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 13)
|
||||
add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
|
||||
set(CMAKE_CUDA_ARCHITECTURES "75-virtual;80-virtual;86-virtual") # lowest CUDA 13 standard
|
||||
elseif(CUDAToolkit_VERSION VERSION_GREATER 12)
|
||||
add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
|
||||
set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
|
||||
else()
|
||||
|
|
|
|||
6
Makefile
6
Makefile
|
|
@ -231,6 +231,12 @@ NVCCFLAGS += -Wno-deprecated-gpu-targets \
|
|||
-gencode arch=compute_75,code=compute_75 \
|
||||
-gencode arch=compute_80,code=compute_80
|
||||
|
||||
else ifdef LLAMA_ARCHES_CU13
|
||||
NVCCFLAGS += -Wno-deprecated-gpu-targets \
|
||||
-gencode arch=compute_75,code=compute_75 \
|
||||
-gencode arch=compute_80,code=compute_80 \
|
||||
-gencode arch=compute_86,code=compute_86
|
||||
|
||||
else
|
||||
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all
|
||||
endif
|
||||
|
|
|
|||
|
|
@ -39,6 +39,9 @@ fi
|
|||
if [ -n "$ARCHES_CU12" ]; then
|
||||
ARCHES_FLAG="LLAMA_ARCHES_CU12=1"
|
||||
fi
|
||||
if [ -n "$ARCHES_CU13" ]; then
|
||||
ARCHES_FLAG="LLAMA_ARCHES_CU13=1"
|
||||
fi
|
||||
if [ -n "$NO_WMMA" ]; then
|
||||
NO_WMMA_FLAG="LLAMA_NO_WMMA=1"
|
||||
fi
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue