diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index 20f25dc26..145290d98 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -585,6 +585,7 @@ static void speculative_decoding_setup(std::string spec_model_filename, const ll
     draft_ctx_params.offload_kqv = base_ctx_params.offload_kqv;
     draft_model_params.main_gpu = base_model_params.main_gpu;
     draft_model_params.split_mode = llama_split_mode::LLAMA_SPLIT_MODE_LAYER;
+    draft_ctx_params.kv_unified = base_ctx_params.kv_unified;
     #if defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN)
     bool ts_all_zero = true;
     for (int i = 0; i < tensor_split_max; ++i) {
@@ -2183,6 +2184,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
         }
 
         llama_ctx_params.offload_kqv = !inputs.low_vram;
+        llama_ctx_params.kv_unified = true;
         model_params.use_mmap = inputs.use_mmap;
         model_params.use_mlock = inputs.use_mlock;
         model_params.n_gpu_layers = inputs.gpulayers;
diff --git a/klite.embd b/klite.embd
index 8b3c2c0f9..3f6862127 100644
--- a/klite.embd
+++ b/klite.embd
@@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
 -->
 
 <script id="init-config">
-	const LITEVER = 264;
+	const LITEVER = 265;
 	const urlParams = new URLSearchParams(window.location.search);
 	var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
 	const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@@ -5996,16 +5996,28 @@ Current version indicated by LITEVER below.
 				const matchedlw = match.match(/^[ \t]*/);
 				const leadingWhitespace = matchedlw ? matchedlw[0] : '';
 				content = unescape_html(content);
+				if(content.match(/^\${1,}$/)) //only dollar signs, just return
+				{
+					return match;
+				}
 				return leadingWhitespace + temml.renderToString(content); // render LaTeX content
 			});
 			input = input.replace(/(?:^|[^\\])\$(\S[^$\n]*?\S)\$(?!\d)/g, (match, p1) => {
 				let content = p1;
 				content = unescape_html(content);
+				if(content.match(/^\${1,}$/)) //only dollar signs, just return
+				{
+					return match;
+				}
 				return " "+temml.renderToString(content); // render LaTeX content
 			});
 			input = input.replace(/(^\\begin\{math\}\n([\s\S]*?)\n\\end\{math\}$|^\\begin\{equation\}\n([\s\S]*?)\n\\end\{equation\}$)/gm, (match, p1, p2, p3) => { //match math eqns
 				let content = p2 || p3;
 				content = unescape_html(content);
+				if(content.match(/^\${1,}$/)) //only dollar signs, just return
+				{
+					return match;
+				}
 				return temml.renderToString(content); // render LaTeX content
 			});
 			return input;
diff --git a/otherarch/embeddings_adapter.cpp b/otherarch/embeddings_adapter.cpp
index 41dc4c4e5..daad79ab6 100644
--- a/otherarch/embeddings_adapter.cpp
+++ b/otherarch/embeddings_adapter.cpp
@@ -135,6 +135,7 @@ bool embeddingstype_load_model(const embeddings_load_model_inputs inputs)
     ctx_params.n_threads = nthreads;
     ctx_params.n_threads_batch = nthreads;
     ctx_params.flash_attn = inputs.flash_attention;
+    ctx_params.kv_unified = true;
 
     embeddings_ctx = llama_init_from_model(embeddingsmodel, ctx_params);
 
diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp
index 9d9776d09..1d9a5c9eb 100644
--- a/otherarch/sdcpp/sdtype_adapter.cpp
+++ b/otherarch/sdcpp/sdtype_adapter.cpp
@@ -288,6 +288,7 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
 
     if (sd_ctx == NULL) {
         printf("\nError: KCPP SD Failed to create context!\nIf using Flux/SD3.5, make sure you have ALL files required (e.g. VAE, T5, Clip...) or baked in!\n");
+        printf("Otherwise, if you are using GGUF format, you can try the original .safetensors instead (Comfy GGUF not supported)\n");
         return false;
     }
 
diff --git a/otherarch/tts_adapter.cpp b/otherarch/tts_adapter.cpp
index 8072f376d..56aa605b6 100644
--- a/otherarch/tts_adapter.cpp
+++ b/otherarch/tts_adapter.cpp
@@ -539,6 +539,7 @@ bool ttstype_load_model(const tts_load_model_inputs inputs)
     tts_ctx_params.n_threads = nthreads;
     tts_ctx_params.n_threads_batch = nthreads;
     tts_ctx_params.flash_attn = inputs.flash_attention;
+    tts_ctx_params.kv_unified = true;
 
     llama_model * ttcmodel = llama_model_load_from_file(modelfile_ttc.c_str(), tts_model_params);
     ttc_ctx = llama_init_from_model(ttcmodel, tts_ctx_params);