diff --git a/koboldcpp.py b/koboldcpp.py
index 62982788e..7c68c48cb 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -1092,7 +1092,6 @@ def load_model(model_filename):
     if args.quantkv>0:
         inputs.quant_k = inputs.quant_v = args.quantkv
         inputs.flash_attention = True
-        inputs.use_contextshift = 0
     else:
         inputs.quant_k = inputs.quant_v = 0
     inputs.blasbatchsize = args.blasbatchsize
@@ -3682,7 +3681,7 @@ def show_gui():
             fastforward.set(1)
             smartcontextbox.grid_remove()
 
-        if contextshift.get()==0 and flashattention.get()==1:
+        if flashattention.get()==1:
             qkvslider.grid()
             qkvlabel.grid()
             noqkvlabel.grid_remove()
@@ -3692,7 +3691,7 @@ def show_gui():
             noqkvlabel.grid()
 
     def toggleflashattn(a,b,c):
-        if contextshift.get()==0 and flashattention.get()==1:
+        if flashattention.get()==1:
             qkvslider.grid()
             qkvlabel.grid()
             noqkvlabel.grid_remove()
@@ -3906,7 +3905,7 @@ def show_gui():
                 item.grid_remove()
     makecheckbox(tokens_tab,  "Custom RoPE Config", variable=customrope_var, row=22, command=togglerope,tooltiptxt="Override the default RoPE configuration with custom RoPE scaling.")
     makecheckbox(tokens_tab, "Use FlashAttention", flashattention, 28, command=toggleflashattn,  tooltiptxt="Enable flash attention for GGUF models.")
-    noqkvlabel = makelabel(tokens_tab,"Requirments Not Met",31,0,"Requires FlashAttention ENABLED and ContextShift DISABLED.")
+    noqkvlabel = makelabel(tokens_tab,"Requirments Not Met",31,0,"Requires FlashAttention ENABLED.")
     noqkvlabel.configure(text_color="#ff5555")
     qkvslider,qkvlabel,qkvtitle = makeslider(tokens_tab, "Quantize KV Cache:", quantkv_text, quantkv_var, 0, 2, 30, set=0,tooltip="Enable quantization of KV cache.\nRequires FlashAttention and disables ContextShift.")
     makecheckbox(tokens_tab, "No BOS Token", nobostoken_var, 33, tooltiptxt="Prevents BOS token from being added at the start of any prompt. Usually NOT recommended for most models.")
@@ -4109,7 +4108,7 @@ def show_gui():
         args.quiet = quietmode.get()==1
         args.nocertify = nocertifymode.get()==1
         args.nomodel = nomodel.get()==1
-        if contextshift.get()==0 and flashattention.get()==1:
+        if flashattention.get()==1:
             args.quantkv = quantkv_var.get()
         else:
             args.quantkv = 0
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index cf9181cc3..5e4d8be2b 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -720,7 +720,7 @@ void llama_context::kv_self_update() {
             printf("\nWARNING: The current context does not support K-shift!\n");
         } else {
 
-        LLAMA_LOG_DEBUG("%s: applying K-shift\n", __func__);
+        // LLAMA_LOG_DEBUG("%s: applying K-shift\n", __func__);
 
         // apply K-shift if needed
         if (model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE) {
@@ -774,7 +774,7 @@ void llama_context::kv_self_update() {
 
     // reserve a worst case graph if needed
     if (need_reserve) {
-        LLAMA_LOG_DEBUG("%s: reserving a worst case graph\n", __func__);
+        // LLAMA_LOG_DEBUG("%s: reserving a worst case graph\n", __func__);
 
         // build worst-case graph
         uint32_t n_seqs = 1; // TODO: worst-case number of sequences