mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
try fix oldpc cuda broken without flash attn since upstream pr14361 between 1.94 and 1.95 (+1 squashed commits)
Squashed commits: [940f0c639] try fix oldpc cuda broken without flash attn since upstream pr14361 between 1.94 and 1.95
This commit is contained in:
parent
4c1faf61b2
commit
487d509b44
2 changed files with 8 additions and 1 deletions
|
@ -2061,6 +2061,13 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
|
||||||
bool use_batched_cublas_bf16 = src0->type == GGML_TYPE_BF16 && bf16_mma_hardware_available(cc);
|
bool use_batched_cublas_bf16 = src0->type == GGML_TYPE_BF16 && bf16_mma_hardware_available(cc);
|
||||||
bool use_batched_cublas_f32 = src0->type == GGML_TYPE_F32;
|
bool use_batched_cublas_f32 = src0->type == GGML_TYPE_F32;
|
||||||
|
|
||||||
|
if(ggml_cuda_highest_compiled_arch(cc) <= GGML_CUDA_CC_TURING)
|
||||||
|
{
|
||||||
|
//kcpp: https://github.com/ggml-org/llama.cpp/pull/14361 broke oldpc mode without this.
|
||||||
|
use_batched_cublas_bf16 = false;
|
||||||
|
use_batched_cublas_f32 = false;
|
||||||
|
}
|
||||||
|
|
||||||
if (!split && use_mul_mat_vec_f) {
|
if (!split && use_mul_mat_vec_f) {
|
||||||
// the custom F16 vector kernel can be used over batched cuBLAS GEMM
|
// the custom F16 vector kernel can be used over batched cuBLAS GEMM
|
||||||
// but this is only faster for GPUs without tensor cores or with a thin src0 matrix (particularly KQV in attention)
|
// but this is only faster for GPUs without tensor cores or with a thin src0 matrix (particularly KQV in attention)
|
||||||
|
|
|
@ -63,7 +63,7 @@ dry_seq_break_max = 128
|
||||||
extra_images_max = 4
|
extra_images_max = 4
|
||||||
|
|
||||||
# global vars
|
# global vars
|
||||||
KcppVersion = "1.97.3"
|
KcppVersion = "1.97.4"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
kcpp_instance = None #global running instance
|
kcpp_instance = None #global running instance
|
||||||
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_config_target":""}
|
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_config_target":""}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue