From f0564f9caf753934ffe20ee2ac3f8a917cb2592c Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Thu, 17 Jul 2025 00:11:08 +0800
Subject: [PATCH] updated lite, added better separators for multimodal chunks
 (universal)

---
 gpttype_adapter.cpp   |  50 +++++++----
 klite.embd            | 203 ++++++++++++++++++++++++++----------------
 otherarch/otherarch.h |   2 +
 3 files changed, 163 insertions(+), 92 deletions(-)
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index f253e5fc2..bbfd5c792 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -3009,13 +3009,12 @@ int GetThreadsToUse(bool blasmode)
 }
 
 //this function prepares the clip embds for llava. it's only needed when images change
-static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_sep, const std::vector<int> & media_intro)
+static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_intro)
 {
     bool vision_on = (clp_ctx_v != nullptr && clp_img_data != nullptr);
     bool audio_on = (clp_ctx_a != nullptr);
     if (vision_on || audio_on)
     {
-        int sepsize = media_sep.size();
         int introsize = media_intro.size();
         last_media_mem.clear();
 
@@ -3048,7 +3047,7 @@ static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_sep
                     int cliptokensneeded = chunk.clp_image_tokens;
                     if(cliptokensneeded>0 && cliptokensneeded < nctx)
                     {
-                        int tokcnt = (i==0?(chunk.clp_image_tokens):(chunk.clp_image_tokens+sepsize));
+                        int tokcnt = (chunk.clp_image_tokens + media_objects[i].chunk_start_seq.size() + media_objects[i].chunk_end_seq.size());
                         if(i==0)
                         {
                             tokcnt += introsize;
@@ -3101,7 +3100,7 @@ static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_sep
                 int cliptokensneeded = total_chunk_tokens;
                 if(cliptokensneeded>0 && cliptokensneeded < nctx)
                 {
-                    int tokcnt = (i==0?(cliptokensneeded):(cliptokensneeded+sepsize));
+                    int tokcnt = (cliptokensneeded + media_objects[i].chunk_start_seq.size() + media_objects[i].chunk_end_seq.size());
                     if(i==0)
                     {
                         tokcnt += introsize;
@@ -3289,6 +3288,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
             media_object lv;
             lv.b64data = item;
             lv.is_audio = false;
+            TokenizeString("<image>", lv.chunk_start_seq, file_format, false);
+            TokenizeString("</image>\n\n", lv.chunk_end_seq, file_format, false);
             media_objects.push_back(lv);
             new_media_composite += item;
         }
@@ -3301,6 +3302,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
             media_object lv;
             lv.b64data = item;
             lv.is_audio = true;
+            TokenizeString("<audio>", lv.chunk_start_seq, file_format, false);
+            TokenizeString("</audio>\n\n", lv.chunk_end_seq, file_format, false);
             media_objects.push_back(lv);
             new_media_composite += item;
         }
@@ -3473,8 +3476,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
     // tokenize the prompt
     std::vector<int> embd_inp;
     std::vector<int> embd_inp_mem; //for storing added memory
-    std::vector<int> media_sep; //to separate between different llava images
-    std::vector<int> media_intro; //to separate between different llava images
+    std::vector<int> media_intro; //added before media list
     std::vector<int> guidance_embd; //holds the guidance prompt
     bool media_embds_built = false;
 
@@ -3482,7 +3484,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
 
     TokenizeString(kcpp_data->prompt, embd_inp, file_format, add_bos_token);
     bool use_mrope = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
-    TokenizeString("\n\n", media_sep, file_format, false);
     TokenizeString("\nAttached Media:\n", media_intro, file_format, false);
 
     if(media_composite_image_signature=="")
@@ -3491,7 +3492,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
     }
     if(media_data_changed)
     {
-        PrepareMediaEmbds(nctx, media_sep, media_intro);
+        PrepareMediaEmbds(nctx, media_intro);
         media_embds_built = true;
     }
 
@@ -4263,7 +4264,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
                 {
                     if(!media_embds_built) //this should never happen! however, handle it anyway
                     {
-                        PrepareMediaEmbds(nctx, media_sep, media_intro);
+                        PrepareMediaEmbds(nctx, media_intro);
                         media_embds_built = true;
                         printf("\nSomehow vision embd was not prepared (maybe no fast forward), rebuilding it...\n");
                     }
@@ -4278,7 +4279,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
                         //batch is empty, do image processing
                         int llavatokenscounted = 0;
                         int llavatokensevaled = 0;
-                        int sepsize = media_sep.size();
                         int introsize = media_intro.size();
                         while(input_consumed < embd_inp.size() && (embd_inp[input_consumed]==MEDIA_TOKEN_IDENTIFIER_A || embd_inp[input_consumed]==MEDIA_TOKEN_IDENTIFIER_B))
                         {
@@ -4310,10 +4310,11 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
                                 n_past += introsize;
                                 llavatokensevaled += introsize;
                             }
-                            if(sepsize>0 && i>0)
-                            {
+
+                            int start_size = media_objects[i].chunk_start_seq.size();
+                            if (start_size > 0) {
                                 //add a separator between each image
-                                kcpp_embd_batch batch = kcpp_embd_batch(media_sep, n_past, use_mrope, false);
+                                kcpp_embd_batch batch = kcpp_embd_batch(media_objects[i].chunk_start_seq, n_past, use_mrope, false);
                                 auto evr = llama_decode(llama_ctx_v4, batch.batch);
                                 if(evr!=0)
                                 {
@@ -4321,10 +4322,10 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
                                 }
                                 else
                                 {
-                                    printf("\rProcessing Media Separator (%d tokens)",sepsize);
+                                    printf("\rProcessing Media Start Separator (%d tokens)",start_size);
                                 }
-                                n_past += sepsize;
-                                llavatokensevaled += sepsize;
+                                n_past += start_size;
+                                llavatokensevaled += start_size;
                             }
 
                             for(int j=0;j<media_objects[i].mediachunks.size();++j)
@@ -4348,6 +4349,23 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
                                     return output;
                                 }
                             }
+
+                            int end_size = media_objects[i].chunk_end_seq.size();
+                            if (end_size > 0) {
+                                //add a separator between each image
+                                kcpp_embd_batch batch = kcpp_embd_batch(media_objects[i].chunk_end_seq, n_past, use_mrope, false);
+                                auto evr = llama_decode(llama_ctx_v4, batch.batch);
+                                if(evr!=0)
+                                {
+                                    printf("\nError when appending media separator: %d\n",evr);
+                                }
+                                else
+                                {
+                                    printf("\rProcessing Media End Separator (%d tokens)",end_size);
+                                }
+                                n_past += end_size;
+                                llavatokensevaled += end_size;
+                            }
                         }
                         if(llavatokenscounted!=llavatokensevaled)
                         {
diff --git a/klite.embd b/klite.embd
index 0852fc41f..121cdf1de 100644
--- a/klite.embd
+++ b/klite.embd
@@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
 -->
 
 <script id="init-config">
-	const LITEVER = 262;
+	const LITEVER = 263;
 	const urlParams = new URLSearchParams(window.location.search);
 	var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
 	const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@@ -3146,7 +3146,7 @@ Current version indicated by LITEVER below.
 	var selected_models = []; //this stores ALL selected models properties as array of objects
 	var worker_data = [];
 	var selected_workers = [];
-	//gametext_arr stores images inline, with the special format [<|p|id|p|>] or [<|d|id|d|>], which is either an ID for loaded image data, or an ID for pending requests
+	//gametext_arr stores images inline, with the special format [<|p|id|p|>] or [<|h|hash|h|>], which is either a hash for loaded media data, or an ID for pending requests
 	var gametext_arr = []; //array of texts currently displayed
 	var redo_arr = []; //array of texts that are in the redo stack
 	var retry_prev_text = []; //when we retry, save the last 3 versions in case they want to undo
@@ -3181,7 +3181,6 @@ Current version indicated by LITEVER below.
 	var image_db = {}; //stores a dictionary of pending images
 	var interrogation_db = {};
 	var completed_imgs_meta = {}; //stores temp info on completed images like alt text
-	var img_hash_to_b64_lookup = {}; //used to revert imghash to b64. temporary storage
 	var data_hash_to_blob_lookup = {}; //used for temporary blob storage, such as with embedded audio
 	//key is ID, body is {done:false,queue:10,result:""}
 	var stablemodels = [{"name": "stable_diffusion","count": 1}]; //stored as {name,count}
@@ -7454,7 +7453,7 @@ Current version indicated by LITEVER below.
 		let export_arr_no_img = [];
 		let export_hashes = {};
 		for (let i = 0; i < gametext_arr.length; ++i) {
-			export_arr_no_img.push(gametext_arr[i].replace(/\[<\|p\|.+?\|p\|>\]/g, "").replace(/\[<\|d\|.+?\|d\|>\]/g, ""));
+			export_arr_no_img.push(gametext_arr[i].replace(/\[<\|p\|.+?\|p\|>\]/g, "").replace(/\[<\|h\|.+?\|h\|>\]/g, ""));
 		}
 		if(!save_images)
 		{
@@ -7464,12 +7463,11 @@ Current version indicated by LITEVER below.
 		{
 			//bake used image metas into savefile
 			for (let i = 0; i < gametext_arr.length; ++i) {
-				let matches = gametext_arr[i].match(/\[<\|d\|.+?\|d\|>\]/g);
-				for(let m in matches)
-				{
-					let inner = matches[m].substring(5, matches[m].length - 5);
-					let imghash = cyrb_hash(inner);
-					if (completed_imgs_meta[imghash] != null) {
+				let matches = gametext_arr[i].matchAll(/\[<\|h\|(.+?)\|h\|>\]/g);
+				for (const match of matches) {
+					let imghash = match[1];
+					if (completed_imgs_meta[imghash] != null)
+					{
 						export_hashes[imghash] = completed_imgs_meta[imghash];
 					}
 				}
@@ -8028,7 +8026,16 @@ Current version indicated by LITEVER below.
 				{
 					for (var key in storyobj.completed_imgs_meta)
 					{
+						let oldb64 = "";
+						if(completed_imgs_meta[key] && completed_imgs_meta[key].data)
+						{
+							oldb64 = completed_imgs_meta[key].data;
+						}
 						completed_imgs_meta[key] = storyobj.completed_imgs_meta[key];
+						if(completed_imgs_meta[key] && !completed_imgs_meta[key].data && oldb64)
+						{
+							completed_imgs_meta[key].data = oldb64;
+						}
 						if(completed_imgs_meta[key] && completed_imgs_meta[key].visionmode==4)
 						{
 							completed_imgs_meta[key].visionmode = 3; //todo: temporary backwards compat, to be removed.
@@ -10261,7 +10268,7 @@ Current version indicated by LITEVER below.
 			{
 				document.getElementById("oairoledropdown").value = localsettings.saved_oai_role;
 			}
-			if(document.getElementById("customapidropdown").value==7) //mistral api supports prefill
+			if(document.getElementById("customapidropdown").value==7 || (document.getElementById("customapidropdown").value==2 && document.getElementById("custom_oai_endpoint").value.includes(".moonshot."))) //mistral api supports prefill
 			{
 				document.getElementById("oaiemulatecompletionsbox").classList.remove("hidden");
 			}
@@ -10386,9 +10393,11 @@ Current version indicated by LITEVER below.
 		|| dropdown.value.includes("text-davinci-001") || dropdown.value.includes("gpt-3.5-turbo-instruct") || dropdown.value == "davinci");
 		if(autotoggle_check)
 		{
+			document.getElementById("useoaichatcompl").disabled = false;
 			if(ep_should_always_use_chat_completions() || dropdown.selectedIndex==dropdown.options.length-1)
 			{
 				document.getElementById("useoaichatcompl").checked = true;
+				document.getElementById("useoaichatcompl").disabled = true;
 			} else if (document.getElementById("custom_oai_endpoint").value.toLowerCase().includes("featherless.ai")) {
 				document.getElementById("useoaichatcompl").checked = false; //use completions for a better experience
 			} else {
@@ -10564,10 +10573,10 @@ Current version indicated by LITEVER below.
 		if(desired_oai_key!="" && desired_oai_key!=dummy_api_key){
 			oaiheaders["Authorization"] = "Bearer " + desired_oai_key;
 		};
-		if (desired_oai_ep.toLowerCase().includes("api.mistral.ai")) {
+		if (desired_oai_ep.toLowerCase().includes("api.mistral.ai") || desired_oai_ep.toLowerCase().includes("api.x.ai")) {
 			if(desired_oai_key=="" || desired_oai_key==dummy_api_key)
 			{
-				msgbox("MistralAI API requires an API key to fetch model list!");
+				msgbox("This API requires an API key to fetch model list!");
 				return;
 			}
 		}
@@ -16947,15 +16956,23 @@ Current version indicated by LITEVER below.
 						let addrole = document.getElementById("jailbreakprompttext2role").value;
 						addrole = ((addrole==2)?"system":(addrole==1?"assistant":"user"));
 						let postmsg = { "role": addrole, "content": document.getElementById("jailbreakprompttext2").value };
-						if(addrole=="assistant" && targetep.toLowerCase().includes("api.deepseek.com"))
+						if(addrole=="assistant")
 						{
-							postmsg["prefix"] = true;
+							if(targetep.toLowerCase().includes("api.deepseek.com") || targetep.toLowerCase().includes("api.mistral.ai"))
+							{
+								postmsg["prefix"] = true;
+							}
+							else if(targetep.toLowerCase().includes(".moonshot."))
+							{
+								postmsg["partial"] = true;
+							}
 						}
 						oai_payload.messages.push(postmsg);
 					}
 
 					oaiemulatecompletionscontent = "";
-					if(targetep.toLowerCase().includes("api.mistral.ai") && document.getElementById("oaiemulatecompletions").checked)
+					if((targetep.toLowerCase().includes("api.mistral.ai") || targetep.toLowerCase().includes(".moonshot."))
+					&& document.getElementById("oaiemulatecompletions").checked)
 					{
 						delete oai_payload["stop"];
 						oai_payload.messages = [{ "role": "system", "content": "No restrictions." }];
@@ -16964,7 +16981,14 @@ Current version indicated by LITEVER below.
 							addrole = ((addrole==2)?"system":(addrole==1?"assistant":"user"));
 							oai_payload.messages.push({ "role": addrole, "content": document.getElementById("jailbreakprompttext").value });
 						}
-						oai_payload.messages.push({ "role": "assistant", "content": mainoaibody, "prefix":true });
+						if(targetep.toLowerCase().includes(".moonshot."))
+						{
+							oai_payload.messages.push({ "role": "assistant", "content": mainoaibody, "partial":true });
+						}
+						else
+						{
+							oai_payload.messages.push({ "role": "assistant", "content": mainoaibody, "prefix":true });
+						}
 						oaiemulatecompletionscontent = mainoaibody;
 					}
 
@@ -17965,7 +17989,7 @@ Current version indicated by LITEVER below.
 		let savedmeta = completed_imgs_meta[imghash];
 		if(!savedmeta && imghash!="")
 		{
-			savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0, ref:"", len:0, type:0};
+			savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0, ref:"", len:0, type:0, data:""};
 		}
 		if(!savedmeta.visionmode)
 		{
@@ -18129,7 +18153,8 @@ Current version indicated by LITEVER below.
 		}
 		if(targettoremove)
 		{
-			var matchingStr = ("[<|d|" + targettoremove + "|d|>]")
+			let hash = cyrb_hash(targettoremove);
+			let matchingStr = ("[<|h|" + hash + "|h|>]");
 			for (let i = 0; i < gametext_arr.length; ++i) {
 				if (gametext_arr[i].includes(matchingStr)) {
 					gametext_arr[i] = gametext_arr[i].replace(matchingStr, "");
@@ -18150,15 +18175,12 @@ Current version indicated by LITEVER below.
 		{
 			siclass = "storyimgsidehorizontal"; //horizontal stack
 		}
-		text = text.replace(/\[<\|p\|.+?\|p\|>\]/g, function (m) {
-			// m here means the whole matched string
-			let inner = m.substring(5, m.length - 5);
+		text = text.replace(/\[<\|p\|(.+?)\|p\|>\]/g, function (_match, inner) {
 			inner = render_media_html("", inner, siclass);
 			return inner;
 		});
-		text = text.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) {
-			// m here means the whole matched string
-			let inner = m.substring(5, m.length - 5);
+
+		text = text.replace(/\[<\|h\|(.+?)\|h\|>\]/g, function (_match, inner) {
 			inner = render_media_html(inner, "", siclass);
 			return inner;
 		});
@@ -18166,22 +18188,31 @@ Current version indicated by LITEVER below.
 		return text;
 	}
 
-	function render_media_html(data, pend_txt = "", siclass="storyimgfloat")
+	function render_media_html(hash, pend_txt = "", siclass="storyimgfloat")
 	{
+		//if it's a meta reference, retrieve actual data
+		let data = "";
+		if(hash!="")
+		{
+			if(completed_imgs_meta[hash] != null && completed_imgs_meta[hash].data)
+			{
+				data = completed_imgs_meta[hash].data;
+			}
+		}
 		if(data.startsWith("data:audio"))
 		{
-			return render_audio_html(data);
+			return render_audio_html(hash, data);
 		}
 		else //also handles ALL pending items
 		{
-			return render_image_html(data, pend_txt, siclass);
+			return render_image_html(hash, data, pend_txt, siclass);
 		}
 		return "";
 	}
 
-	function render_audio_html(data)
+	function render_audio_html(hash, data)
 	{
-		let audiohash = cyrb_hash(data).trim();
+		let audiohash = hash.trim();
 		let audioblob = b64_to_persistent_blob(data,audiohash);
 		let filename = "";
 		let len = 0;
@@ -18195,7 +18226,7 @@ Current version indicated by LITEVER below.
 		return str;
 	}
 
-	function render_image_html(data, pend_txt = "", siclass="storyimgfloat") {
+	function render_image_html(hash, data, pend_txt = "", siclass="storyimgfloat") {
 		var dim = PREVIEW_RES_PX; //image preview. adventure mode has smaller pictures
 		dimW = dim;
 		dimH = dim;
@@ -18213,7 +18244,7 @@ Current version indicated by LITEVER below.
 
 			return `<div class="${siclass}${reinvertcolor}" contenteditable="false"><img src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEASABIAAD/2wBDABsSFBcUERsXFhceHBsgKEIrKCUlKFE6PTBCYFVlZF9VXVtqeJmBanGQc1tdhbWGkJ6jq62rZ4C8ybqmx5moq6T/2wBDARweHigjKE4rK06kbl1upKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKT/wAARCAEAAQADASIAAhEBAxEB/8QAGQABAQEBAQEAAAAAAAAAAAAAAAEDAgQF/8QAIBABAAIBBQEBAQEAAAAAAAAAAAECEgMRMVKRIWFBof/EABQBAQAAAAAAAAAAAAAAAAAAAAD/xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oADAMBAAIRAxEAPwD7AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABETPENNPT3je3jUHm22HpmInljqUx+xwDgAAAAAAAAAAAAAAAAAAAAAAAAABaxvaIRaztaJB6AAEmN4mFSZ2iZB5wAAAAAAAAAAAAAAAAAAAAAAAAAAAaaeptG1vWrzETMcSD0zMRyx1L5fI4cb7gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA7rpzNd/HAAAAAAAAAAAAAAAAAAAAAAAAAAAAADTT09/s8Gnp7/Z4agONSmX2OXYDzDbUpl9jliAAAAAAAAAAAAAAAAAAAsVmd9o4KVm0/jeIiI2gHnGupp/2vjIAABpp6e/2TT09/s8NQAAAAHGpTL7HLsB5htqUy+xyxAAAAAAAAAAAAAAAWlZtP4UrNp/G8RFY2gCIiI2hQAZ6mn/a+NAHmaaenv8AZ4dzp1m2/wDjoAAAAAAAABxqUy+xy7AeYbalMvscsQAAAAAAAAAAFpWbT+FKzafxvEREbQBEREbQoAAAAAAAAAAAAAAAAAONSmX2OXYDzDbUpl9jliAAAAAAAtKzafxaVm0/jaIiI2gCIiI2hQAAAAAAAAAAAAAAAAAAAAAcalMvscuwHmG2pTL7HLEAAAAFi0xxMwZW7T6gC5W7T6ZW7T6gC5W7T6ZW7T6gC5W7T6ZW7T6gC5W7T6ZW7T6gC5W7T6ZW7T6gC5W7T6ZW7T6gC5W7T6ZW7T6gC5W7T6ZW7T6gC5W7T6ZW7T6gC5W7T6ZW7T6gC5W7T6ZW7T6gC5W7T6ZW7T6gC5W7T6kzvyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/Z" width=${dim} height=${dim} style="border-radius: 6%;" title="${alttxt}" alt="${pend_txt}"><div class=\"imgloader\"></div><div class=\"imagelabel\">${waittime}</div></div>`;
 		} else {
-			let imghash = cyrb_hash(data).trim();
+			let imghash = hash.trim();
 			if (completed_imgs_meta[imghash] != null) {
 				alttxt = completed_imgs_meta[imghash].prompt?escape_html(completed_imgs_meta[imghash].prompt):"";
 				if(completed_imgs_meta[imghash].aspect==1) //portrait
@@ -18568,7 +18599,7 @@ Current version indicated by LITEVER below.
 			{
 				const pat = /<t2i>(.*?)<\/t2i>/g;
 				gentxtspeak = gentxtspeak.replace(pat, "");
-				const pat2 = /{{\[IMG_.{1,8}_REF\]}}/g;
+				const pat2 = /{{\[DAT_.{1,8}_REF\]}}/g;
 				gentxtspeak = gentxtspeak.replace(pat2, "");
 			}
 
@@ -18776,13 +18807,13 @@ Current version indicated by LITEVER below.
 						hasChangedImage = true; //set here to update timers
 						if (img.done == true && img.result != "") {
 							needToSave = true;
-							let newstr = "[<|d|" + img.result + "|d|>]";
+							let metaid = cyrb_hash(img.result);
+							let newstr = `[<|h|${metaid}|h|>]`;
 							console.log("Replacing with Image: " + matchstr);
 							gametext_arr[i] = gametext_arr[i].replace(matchstr, newstr);
-							let metaid = cyrb_hash(img.result);
 							//default to llava if supported, and image is self uploaded
 							let desiredvismode = ((image_db[key].imsource==1 && ((is_using_kcpp_with_vision() && image_db[key].type==0) || (is_using_kcpp_with_audio() && image_db[key].type==1)))?3:0);
-							completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", visionmode:desiredvismode, aspect:image_db[key].aspect, ref:image_db[key].imrefid, len:image_db[key].len, type:image_db[key].type};
+							completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", visionmode:desiredvismode, aspect:image_db[key].aspect, ref:image_db[key].imrefid, len:image_db[key].len, type:image_db[key].type, data:img.result};
 							delete image_db[key];
 						}
 					}
@@ -19465,6 +19496,17 @@ Current version indicated by LITEVER below.
 								for (var i = 0; i < tmpstory.actions.length; ++i) {
 									gametext_arr.push(tmpstory.actions[i]);
 								}
+								//handle updated image metadata
+								if(tmpstory.completed_imgs_meta)
+								{
+									for (var key in tmpstory.completed_imgs_meta)
+									{
+										if(!completed_imgs_meta[key])
+										{
+											completed_imgs_meta[key] = tmpstory.completed_imgs_meta[key];
+										}
+									}
+								}
 								render_gametext(false);
 							}
 							else
@@ -19749,29 +19791,25 @@ Current version indicated by LITEVER below.
 			}
 			return `<span class=\"color_pink\">`+m+`</span>`;
 		});
-		text = text.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) {
-			let inner = m.substring(5, m.length - 5);
-			let imghash = cyrb_hash(inner);
-			img_hash_to_b64_lookup[imghash] = m;
-			let hashtag = `{{[IMG_${imghash}_REF]}}`;
-			if(!addspan)
-			{
+		text = text.replace(/\[<\|h\|(.+?)\|h\|>\]/g, function (_match, inner) {
+			let hashtag = `{{[DAT_${inner}_REF]}}`;
+			if (!addspan) {
 				return hashtag;
 			}
-			return `<span class=\"color_pink\">${hashtag}</span>`;
+			return `<span class="color_pink">${hashtag}</span>`;
 		});
 		return text;
 	}
 
 	function unstash_image_placeholders(text)
 	{
-		return text.replace(/{{\[IMG_.{1,8}_REF\]}}/g, function (m) {
+		return text.replace(/{{\[DAT_.{1,8}_REF\]}}/g, function (m) {
 			let imghash = m.substring(7, m.length - 7);
 			if(!imghash)
 			{
 				return m;
 			}
-			let unstash = img_hash_to_b64_lookup[imghash];
+			let unstash = `[<|h|${imghash}|h|>]`;
 			if(!unstash)
 			{
 				return m;
@@ -19795,18 +19833,7 @@ Current version indicated by LITEVER below.
 				retry_preserve_last = false;
 				redo_prev_text = [];
 
-				//stash images
-				gametext_elem.querySelectorAll('div.storyimgcenter,div.storyimgsidevertical,div.storyimgsidehorizontal,div.storyimgfloat').forEach(
-					(el) => {
-						let chimg = el.getElementsByTagName("img")[0];
-						if(el && chimg)
-						{
-							el.replaceWith((chimg.alt == null || chimg.alt == "") ? ("[<|d|" + chimg.src + "|d|>]") : ("[<|p|" + chimg.alt + "|p|>]"))
-						}
-					}
-				);
-
-				//replace b64 image placeholders
+				//replace b64 image placeholders back to the actual stored format
 				gametext_elem.innerHTML = unstash_image_placeholders(gametext_elem.innerHTML);
 
 				let editedChunks = []; //use to count chunk lengths before merging
@@ -19816,7 +19843,6 @@ Current version indicated by LITEVER below.
 					}
 				);
 
-
 				//strip chunks (optimize for firefox by not constantly modifying dom)
 				let htmlstr = gametext_elem.innerHTML;
 				htmlstr = htmlstr.replace(/<span class="(.+?)">(.+?)<\/span>/g, "$2");
@@ -19829,11 +19855,6 @@ Current version indicated by LITEVER below.
 				//rather than dump it all into one history, let's split it into paragraphs
 				let fullmergedstory = gametext_elem.innerText;
 
-				//if it ends with a single newline, remove it to avoid ghost newlines
-				if (fullmergedstory.endsWith("\n") && !fullmergedstory.endsWith("\n\n")) {
-					fullmergedstory = fullmergedstory.slice(0, -1);
-				}
-
 				let newestChunk = "";
 				if(editedChunks.length>1) //split by chunk lengths in reverse order, we only want the newest
 				{
@@ -19846,6 +19867,19 @@ Current version indicated by LITEVER below.
 					}
 				}
 
+				//if it ends with a single newline, remove it to avoid ghost newlines
+				if (newestChunk) {
+					if (newestChunk.endsWith("\n") && !newestChunk.endsWith("\n\n")) {
+						newestChunk = newestChunk.slice(0, -1);
+					}
+				}
+				else
+				{
+					if (fullmergedstory.endsWith("\n") && !fullmergedstory.endsWith("\n\n")) {
+						fullmergedstory = fullmergedstory.slice(0, -1);
+					}
+				}
+
 				//split by newlines for the rest
 				if(fullmergedstory.length>0)
 				{
@@ -19906,7 +19940,7 @@ Current version indicated by LITEVER below.
 			fulltxt = fulltxt.replace(/\[&lt;\|p\|.+?\|p\|&gt;\]/g, function (m) {
 				return unescape_html(m);
 			});
-			fulltxt = fulltxt.replace(/\[&lt;\|d\|.+?\|d\|&gt;\]/g, function (m) {
+			fulltxt = fulltxt.replace(/\[&lt;\|h\|.+?\|h\|&gt;\]/g, function (m) {
 				return unescape_html(m) ;
 			});
 			fulltxt = fulltxt.replace(/\[&lt;\|.+?\|&gt;\]/g, function (m) {
@@ -19950,12 +19984,11 @@ Current version indicated by LITEVER below.
 			{
 				insertAIVisionImages = []; //a bit hacky
 				insertAIAudioSounds = [];
-				fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) {
-					// m here means the whole matched string
-					let inner = m.substring(5, m.length - 5);
-					let imghash = cyrb_hash(inner);
+				fulltxt = fulltxt.replace(/\[<\|h\|(.+?)\|h\|>\]/g, function (_match, inner) {
+					let imghash = inner;
 					let foundmeta = completed_imgs_meta[imghash];
 					if (foundmeta != null) {
+						let data = foundmeta.data;
 						if(foundmeta.desc && (foundmeta.visionmode==1||foundmeta.visionmode==2))
 						{
 							return "\n(Attached Image: " + foundmeta.desc + ")\n";
@@ -19963,14 +19996,14 @@ Current version indicated by LITEVER below.
 						else if(foundmeta.visionmode==3)
 						{
 							let placeholder = "";
-							let parts = inner.split(',');
+							let parts = data.split(',');
 							if (parts.length === 2 && parts[0].startsWith('data:image')) {
-								insertAIVisionImages.push(inner);
+								insertAIVisionImages.push(data);
 								placeholder = "\n(Attached Image)\n";
 							}
 							else if(parts.length === 2 && parts[0].startsWith('data:audio'))
 							{
-								insertAIAudioSounds.push(inner);
+								insertAIAudioSounds.push(data);
 								placeholder = "\n(Attached Audio)\n";
 							}
 							return placeholder;
@@ -19980,7 +20013,7 @@ Current version indicated by LITEVER below.
 				});
 			}
 			fulltxt = fulltxt.replace(/\[<\|p\|.+?\|p\|>\]/g, stripimg_replace_str);
-			fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, stripimg_replace_str);
+			fulltxt = fulltxt.replace(/\[<\|h\|.+?\|h\|>\]/g, stripimg_replace_str);
 
 			//always filter comments - new format
 			fulltxt = fulltxt.replace(/\[<\|[\s\S]+?\|>\]/g, ""); //remove normal comments too
@@ -19992,11 +20025,12 @@ Current version indicated by LITEVER below.
 	function migrate_old_images_in_gametext()
 	{
 		let oldctx = concat_gametext(false, "", "", "", false);
+		let mustMigrate = false;
 		//if we have no new images
 		if (!(/\[<\|p\|.+?\|p\|>\]/.test(oldctx)) && !(/\[<\|d\|.+?\|d\|>\]/.test(oldctx))) {
 			//but we also have old images
 			if ((/<\|p\|.+?\|p\|>/.test(oldctx)) || (/<\|d\|.+?\|d\|>/.test(oldctx))) {
-
+				mustMigrate = true;
 				console.log("Migrating old images from saved story");
 				for (let i = 0; i < gametext_arr.length; ++i) {
 					gametext_arr[i] = gametext_arr[i].replace(/<\|p\|.+?\|p\|>/g, function (m) {
@@ -20008,6 +20042,23 @@ Current version indicated by LITEVER below.
 				}
 			}
 		}
+
+		//now, migrate all unhashed inline images into their final placeholder form
+		if(mustMigrate || (/\[<\|d\|.+?\|d\|>\]/.test(oldctx)))
+		{
+			console.log("Migrating old images 2 from saved story");
+			for (let i = 0; i < gametext_arr.length; ++i) {
+				gametext_arr[i] = gametext_arr[i].replace(/\[<\|d\|(.+?)\|d\|>\]/g, function (match, p1) {
+					let imghash = cyrb_hash(p1);
+					if(!completed_imgs_meta[imghash])
+					{
+						completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0, ref:"", len:0, type:0, data: ""};
+					}
+					completed_imgs_meta[imghash].data = p1;
+					return `[<|h|${imghash}|h|>]`;
+				});
+			}
+		}
 	}
 
 	function update_pending_stream_displays()
@@ -21417,10 +21468,10 @@ Current version indicated by LITEVER below.
 		}
 
 		//a quick fix that adds a newline if there's none before opponent chat and a picture
-		var othernamesregexreplace = new RegExp("\\|[d|p]\\|>(?!" + localsettings.chatname + ").+?\\: ", "gi");
+		var othernamesregexreplace = new RegExp("\\|[h|p]\\|>](?!" + localsettings.chatname + ").+?\\: ", "gi");
 
 		input = input.replace(othernamesregexreplace, function (m) {
-			let rep = m.substring(0,4) + "\n" + m.substring(4);
+			let rep = m.substring(0,5) + "\n" + m.substring(5);
 			return rep;
 		});
 
diff --git a/otherarch/otherarch.h b/otherarch/otherarch.h
index 456fd808e..511c3dafb 100644
--- a/otherarch/otherarch.h
+++ b/otherarch/otherarch.h
@@ -512,6 +512,8 @@ struct media_object
     std::string b64data = "";
     std::vector<media_chunk> mediachunks;
     bool is_audio = false; //if true its audio, otherwise its vision
+    std::vector<int> chunk_start_seq;
+    std::vector<int> chunk_end_seq;
 };
 
 struct speculative_draft_result