diff --git a/src/llama-model.cpp b/src/llama-model.cpp index cd3e45694..2d90ec1ac 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -13530,7 +13530,7 @@ struct llm_build_exaone : public llm_graph_context { template struct llm_build_exaone4 : public llm_graph_context { - llm_build_exaone4(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + llm_build_exaone4(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) { const int64_t n_embd_head = hparams.n_embd_head_k; GGML_ASSERT(n_embd_head == hparams.n_embd_head_v); @@ -13603,7 +13603,7 @@ struct llm_build_exaone4 : public llm_graph_context { cb(Kcur, "Kcur", il); cb(Vcur, "Vcur", il); - cur = build_attn(inp_attn, gf, + cur = build_attn(inp_attn, model.layers[il].wo, NULL, Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); cb(cur, "attn_out", il); @@ -17352,9 +17352,9 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const { case LLM_ARCH_EXAONE4: { if (hparams.swa_type == LLAMA_SWA_TYPE_STANDARD) { - llm = std::make_unique>(*this, params, gf); + llm = std::make_unique>(*this, params); } else { - llm = std::make_unique>(*this, params, gf); + llm = std::make_unique>(*this, params); } } break; case LLM_ARCH_RWKV6: