From 6c4cbdc70b83ac054106e9de3ebc2ecaa82c4b1f Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Mon, 25 May 2026 16:46:23 +0800 Subject: [PATCH] server: MTP layer kv-cache should respect draft type ctk (#23646) --- tools/server/server-context.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 9fecc4247..ae9e0bf60 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -822,6 +822,8 @@ private: auto cparams_dft = common_context_params_to_llama(params_dft); if (spec_mtp) { cparams_dft.ctx_type = LLAMA_CONTEXT_TYPE_MTP; + cparams_dft.type_k = params_base.speculative.draft.cache_type_k; + cparams_dft.type_v = params_base.speculative.draft.cache_type_v; } cparams_dft.n_rs_seq = 0; @@ -940,6 +942,8 @@ private: auto cparams_mtp = common_context_params_to_llama(params_base); cparams_mtp.ctx_type = LLAMA_CONTEXT_TYPE_MTP; + cparams_mtp.type_k = params_base.speculative.draft.cache_type_k; + cparams_mtp.type_v = params_base.speculative.draft.cache_type_v; cparams_mtp.n_rs_seq = 0; ctx_dft.reset(llama_init_from_model(model_tgt, cparams_mtp));