From e47477fd4d776f07b414ce845ebea6e4172a7e96 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 28 Jul 2024 17:27:09 +0800 Subject: [PATCH] don't build rope factors from https://github.com/ggerganov/llama.cpp/pull/8676 for CLBlast as it segfaults --- src/llama.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index 8ab9b14a3..7aa1405ce 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -5921,7 +5921,7 @@ static bool llm_load_tensors( #if defined(GGML_USE_CLBLAST) if(clblast_offload_fallback_mode) { - printf("\nOpenCL GPU Offload Fallback..."); + printf("\nOpenCL GPU Offload Fallback...\n"); clblast_offload_fallback_layers = n_gpu_layers; i_gpu_start = std::max((int64_t) hparams.n_layer, (int64_t) 0); } @@ -8784,7 +8784,11 @@ struct llm_build_context { // self-attention { // rope freq factors for llama3; may return nullptr for llama2 and other models + #if defined(GGML_USE_CLBLAST) + struct ggml_tensor * rope_factors = nullptr; //clblast does not work with rope_factors + #else struct ggml_tensor * rope_factors = build_rope_factors(il); + #endif // compute Q and K and RoPE them struct ggml_tensor * Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq, cur);