diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index bbfd5c792..20f25dc26 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -3059,6 +3059,7 @@ static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_int
                     }
                     else
                     {
+                        media_composite_image_signature = ""; //force invalidate
                         printf("\nWarning: Vision Image excluded - Context size too low or not enough clip tokens! (needed %d)\nImage will be IGNORED! You probably want to relaunch with a larger context size!\n",cliptokensneeded);
                     }
                     media_objects[i].mediachunks.push_back(chunk);
@@ -3112,6 +3113,7 @@ static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_int
                 }
                 else
                 {
+                    media_composite_image_signature = ""; //force invalidate
                     printf("\nWarning: Audio Embd excluded - Context size too low or not enough clip tokens! (needed %d)\nAudio will be IGNORED! You probably want to relaunch with a larger context size!\n",cliptokensneeded);
                 }
 
@@ -3315,7 +3317,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
         media_composite_image_signature = new_media_composite;
         if(debugmode==1 && !is_quiet)
         {
-            printf("\nLLAVA images changed, existing cache invalidated");
+            printf("\nAttached media changed, existing multimodal cache invalidated");
         }
         media_data_changed = true;
     }
@@ -3520,7 +3522,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
     {
         if(last_media_mem.size() + kcpp_data->n_predict + 4 > nctx)
         {
-            printf("\nWarning: Too many LLaVA tokens, max context exceeded! They will be ignored!\n");
+            printf("\nWarning: Too many multimodal tokens, max context exceeded! They will be ignored!\n");
         }
         else
         {
@@ -4266,7 +4268,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
                     {
                         PrepareMediaEmbds(nctx, media_intro);
                         media_embds_built = true;
-                        printf("\nSomehow vision embd was not prepared (maybe no fast forward), rebuilding it...\n");
+                        printf("\nSomehow media embeds was not prepared (maybe no fast forward), rebuilding it...\n");
                     }
 
                     //if partial batch, dispatch existing first
@@ -4301,11 +4303,11 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
                                 auto evr = llama_decode(llama_ctx_v4, batch.batch);
                                 if(evr!=0)
                                 {
-                                    printf("\nError when appending llava intro: %d\n",evr);
+                                    printf("\nError when appending media intro: %d\n",evr);
                                 }
                                 else
                                 {
-                                    printf("\rProcessing LLaVa Intro (%d tokens)",introsize);
+                                    printf("\rProcessing Media Intro (%d tokens)",introsize);
                                 }
                                 n_past += introsize;
                                 llavatokensevaled += introsize;
@@ -4340,7 +4342,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
                                 if(!err)
                                 {
                                     media_composite_image_signature = ""; //force invalidate
-                                    fprintf(stderr, "\nFailed to eval llava image at %d!\n",n_past);
+                                    fprintf(stderr, "\nFailed to eval media tokens at %d!\n",n_past);
                                     output.text = nullptr;
                                     output.status = 0;
                                     output.prompt_tokens = output.completion_tokens = 0;
@@ -4370,7 +4372,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
                         if(llavatokenscounted!=llavatokensevaled)
                         {
                             media_composite_image_signature = ""; //force invalidate
-                            fprintf(stderr, "\nLLAVA image tokens mismatch at %d! (%d vs %d tokens)\n",n_past,llavatokenscounted,llavatokensevaled);
+                            fprintf(stderr, "\nMedia tokens mismatch at %d! (%d vs %d tokens)\n",n_past,llavatokenscounted,llavatokensevaled);
                             output.text = nullptr;
                             output.status = 0;
                             output.prompt_tokens = output.completion_tokens = 0;
diff --git a/klite.embd b/klite.embd
index dfc542f56..2beefb51f 100644
--- a/klite.embd
+++ b/klite.embd
@@ -14734,11 +14734,11 @@ Current version indicated by LITEVER below.
 
 	function self_upload_file_dispatch(data,filename)
 	{
-		const maxSize = 20 * 1024 * 1024; // approx 20MB limit
+		const maxSize = 30 * 1024 * 1024; // approx 30MB limit
 		const dlen = (data.length*0.75);
 		const mbs = Math.ceil(dlen/1024/1024);
 		if (dlen > maxSize) {
-			msgbox(`Selected file exceeds 20MB size limit!\nSelected file was ${mbs}MB. Please try a smaller file.`, "File Too Large");
+			msgbox(`Selected file exceeds 30MB size limit!\nSelected file was ${mbs}MB. Please try a smaller file.`, "File Too Large");
 			return;
 		}
 
diff --git a/koboldcpp.py b/koboldcpp.py
index b41e10655..0df5daa23 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -5124,7 +5124,7 @@ def show_gui():
     ctk.CTkButton(model_tab, width=70, text = "HF Search", command = model_searcher ).grid(row=1,column=0, stick="nw", padx=370, pady=2)
     makefileentry(model_tab, "Text Lora:", "Select Lora File",lora_var, 3,width=160,singlerow=True,tooltiptxt="Select an optional GGML Text LoRA adapter to use.\nLeave blank to skip.")
     makelabelentry(model_tab, "Multiplier: ", loramult_var, 3, 50,padx=390,singleline=True,tooltip="Scale multiplier for Text LoRA Strength. Default is 1.0", labelpadx=330)
-    makefileentry(model_tab, "Vision mmproj:", "Select Vision mmproj File", mmproj_var, 7,width=280,singlerow=True,tooltiptxt="Select a mmproj file to use for vision models like LLaVA.\nLeave blank to skip.")
+    makefileentry(model_tab, "Mmproj File:", "Select Audio or Vision mmproj File", mmproj_var, 7,width=280,singlerow=True,tooltiptxt="Select a mmproj file to use for multimodal models for vision and audio recognition.\nLeave blank to skip.")
     makecheckbox(model_tab, "Vision Force CPU", mmprojcpu_var, 9, tooltiptxt="Force CLIP for Vision mmproj always on CPU.")
     makelabelentry(model_tab, "Vision MaxRes:", visionmaxres_var, 9, padx=320, singleline=True, tooltip=f"Clamp MMProj vision maximum allowed resolution. Allowed values are between 512 to 2048 px (default {default_visionmaxres}).", labelpadx=220)
     makefileentry(model_tab, "Draft Model:", "Select Speculative Text Model File", draftmodel_var, 11,width=280,singlerow=True,tooltiptxt="Select a draft text model file to use for speculative decoding.\nLeave blank to skip.")