From 0460d92cc379faabac7a1e6a78324dcf808e1033 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 13 Mar 2025 20:28:26 +0800 Subject: [PATCH] disable context shifting for gemma3 --- gpttype_adapter.cpp | 6 ++++++ model_adapter.cpp | 4 ++++ model_adapter.h | 1 + 3 files changed, 11 insertions(+) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index e8b457764..afb4d0400 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1792,6 +1792,7 @@ void PurgeMissingTokens(llama_context * ctx, llama_context * draft_ctx, std::vec auto shared = LongestCommonSubseq(curr_ctx_without_memory, new_ctx_without_memory); + printf("\nSharedSize: %d, LCSTokThreshold: %d, ArrPass: %d\n",shared.size(),LCSTokThreshold,ArrStartWith(new_ctx_without_memory, shared)); if (shared.size() > LCSTokThreshold && ArrStartWith(new_ctx_without_memory, shared)) // enough tokens in common { int found = ArrFindIndexOf(current_context_tokens,shared); @@ -2128,6 +2129,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in printf("Qwen2VL detected! Mrope will be used, and context shift will be disabled!\n"); kcpp_data->use_contextshift = false; } + if(file_format_meta.model_architecture == GGUFArch::ARCH_GEMMA3) + { + printf("Gemma3 detected! Context shift will be disabled!\n"); + kcpp_data->use_contextshift = false; + } model_params.main_gpu = cu_parseinfo_maindevice; #if defined(GGML_USE_CUDA) diff --git a/model_adapter.cpp b/model_adapter.cpp index 9cb8ac0f0..2abb0315f 100644 --- a/model_adapter.cpp +++ b/model_adapter.cpp @@ -321,6 +321,10 @@ void print_tok_vec(std::vector &embd) { fileformatmeta->model_architecture = GGUFArch::ARCH_QWEN2VL; } + else if(modelarch=="gemma3") + { + fileformatmeta->model_architecture = GGUFArch::ARCH_GEMMA3; + } else if(modelarch=="rwkv6") { fileformatmeta->model_architecture = GGUFArch::ARCH_RWKV; diff --git a/model_adapter.h b/model_adapter.h index c28bb0cf2..883e0b93f 100644 --- a/model_adapter.h +++ b/model_adapter.h @@ -60,6 +60,7 @@ enum GGUFArch ARCH_QWEN2 = 5, ARCH_RWKV = 6, ARCH_QWEN2VL = 7, + ARCH_GEMMA3 = 8, }; struct FileFormatExtraMeta