disable context shifting for gemma3

2025-09-10 00:54:41 +00:00 · 2025-03-13 20:28:26 +08:00 · 2025-03-13 20:28:26 +08:00 · 0460d92cc3
commit 0460d92cc3
parent ca698f0cbe
3 changed files with 11 additions and 0 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -1792,6 +1792,7 @@ void PurgeMissingTokens(llama_context * ctx, llama_context * draft_ctx, std::vec

    auto shared = LongestCommonSubseq(curr_ctx_without_memory, new_ctx_without_memory);

+    printf("\nSharedSize: %d, LCSTokThreshold: %d, ArrPass: %d\n",shared.size(),LCSTokThreshold,ArrStartWith(new_ctx_without_memory, shared));
    if (shared.size() > LCSTokThreshold && ArrStartWith(new_ctx_without_memory, shared)) // enough tokens in common
    {
        int found = ArrFindIndexOf(current_context_tokens,shared);
@ -2128,6 +2129,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
            printf("Qwen2VL detected! Mrope will be used, and context shift will be disabled!\n");
            kcpp_data->use_contextshift = false;
        }
+        if(file_format_meta.model_architecture == GGUFArch::ARCH_GEMMA3)
+        {
+            printf("Gemma3 detected! Context shift will be disabled!\n");
+            kcpp_data->use_contextshift = false;
+        }
        model_params.main_gpu = cu_parseinfo_maindevice;

        #if defined(GGML_USE_CUDA)