mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-09 11:00:40 +00:00
i hate doing this
This commit is contained in:
parent
c66371fbb0
commit
a7a4e238fb
2 changed files with 4 additions and 8 deletions
|
|
@ -126,14 +126,15 @@ if (LLAMA_CUBLAS)
|
|||
# 60 == f16 CUDA intrinsics
|
||||
# 61 == integer CUDA intrinsics
|
||||
# 70 == (assumed) compute capability at which unrolling a loop in mul_mat_q kernels is faster
|
||||
# 75 == int8 tensor cores
|
||||
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
|
||||
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
|
||||
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") # needed for f16 CUDA intrinsics
|
||||
else()
|
||||
message("CUDA Toolkit Version: ${CUDAToolkit_VERSION}")
|
||||
if(CUDAToolkit_VERSION VERSION_GREATER 12)
|
||||
set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
|
||||
set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES "37;52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
|
||||
set(CMAKE_CUDA_ARCHITECTURES "37;52;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -108,11 +108,6 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) {
|
|||
return false;
|
||||
}
|
||||
|
||||
if(g_mul_mat_q)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (cc < CC_OFFSET_AMD) {
|
||||
return cc < CC_VOLTA || ne11 < MMQ_DP4A_MAX_BATCH_SIZE;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue