updated lite, added better separators for multimodal chunks (universal)

This commit is contained in:
Concedo 2025-07-17 00:11:08 +08:00
parent 2a59adce0f
commit f0564f9caf
3 changed files with 163 additions and 92 deletions

View file

@ -3009,13 +3009,12 @@ int GetThreadsToUse(bool blasmode)
}
//this function prepares the clip embds for llava. it's only needed when images change
static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_sep, const std::vector<int> & media_intro)
static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_intro)
{
bool vision_on = (clp_ctx_v != nullptr && clp_img_data != nullptr);
bool audio_on = (clp_ctx_a != nullptr);
if (vision_on || audio_on)
{
int sepsize = media_sep.size();
int introsize = media_intro.size();
last_media_mem.clear();
@ -3048,7 +3047,7 @@ static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_sep
int cliptokensneeded = chunk.clp_image_tokens;
if(cliptokensneeded>0 && cliptokensneeded < nctx)
{
int tokcnt = (i==0?(chunk.clp_image_tokens):(chunk.clp_image_tokens+sepsize));
int tokcnt = (chunk.clp_image_tokens + media_objects[i].chunk_start_seq.size() + media_objects[i].chunk_end_seq.size());
if(i==0)
{
tokcnt += introsize;
@ -3101,7 +3100,7 @@ static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_sep
int cliptokensneeded = total_chunk_tokens;
if(cliptokensneeded>0 && cliptokensneeded < nctx)
{
int tokcnt = (i==0?(cliptokensneeded):(cliptokensneeded+sepsize));
int tokcnt = (cliptokensneeded + media_objects[i].chunk_start_seq.size() + media_objects[i].chunk_end_seq.size());
if(i==0)
{
tokcnt += introsize;
@ -3289,6 +3288,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
media_object lv;
lv.b64data = item;
lv.is_audio = false;
TokenizeString("<image>", lv.chunk_start_seq, file_format, false);
TokenizeString("</image>\n\n", lv.chunk_end_seq, file_format, false);
media_objects.push_back(lv);
new_media_composite += item;
}
@ -3301,6 +3302,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
media_object lv;
lv.b64data = item;
lv.is_audio = true;
TokenizeString("<audio>", lv.chunk_start_seq, file_format, false);
TokenizeString("</audio>\n\n", lv.chunk_end_seq, file_format, false);
media_objects.push_back(lv);
new_media_composite += item;
}
@ -3473,8 +3476,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
// tokenize the prompt
std::vector<int> embd_inp;
std::vector<int> embd_inp_mem; //for storing added memory
std::vector<int> media_sep; //to separate between different llava images
std::vector<int> media_intro; //to separate between different llava images
std::vector<int> media_intro; //added before media list
std::vector<int> guidance_embd; //holds the guidance prompt
bool media_embds_built = false;
@ -3482,7 +3484,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
TokenizeString(kcpp_data->prompt, embd_inp, file_format, add_bos_token);
bool use_mrope = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
TokenizeString("\n\n", media_sep, file_format, false);
TokenizeString("\nAttached Media:\n", media_intro, file_format, false);
if(media_composite_image_signature=="")
@ -3491,7 +3492,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
}
if(media_data_changed)
{
PrepareMediaEmbds(nctx, media_sep, media_intro);
PrepareMediaEmbds(nctx, media_intro);
media_embds_built = true;
}
@ -4263,7 +4264,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
{
if(!media_embds_built) //this should never happen! however, handle it anyway
{
PrepareMediaEmbds(nctx, media_sep, media_intro);
PrepareMediaEmbds(nctx, media_intro);
media_embds_built = true;
printf("\nSomehow vision embd was not prepared (maybe no fast forward), rebuilding it...\n");
}
@ -4278,7 +4279,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
//batch is empty, do image processing
int llavatokenscounted = 0;
int llavatokensevaled = 0;
int sepsize = media_sep.size();
int introsize = media_intro.size();
while(input_consumed < embd_inp.size() && (embd_inp[input_consumed]==MEDIA_TOKEN_IDENTIFIER_A || embd_inp[input_consumed]==MEDIA_TOKEN_IDENTIFIER_B))
{
@ -4310,10 +4310,11 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
n_past += introsize;
llavatokensevaled += introsize;
}
if(sepsize>0 && i>0)
{
int start_size = media_objects[i].chunk_start_seq.size();
if (start_size > 0) {
//add a separator between each image
kcpp_embd_batch batch = kcpp_embd_batch(media_sep, n_past, use_mrope, false);
kcpp_embd_batch batch = kcpp_embd_batch(media_objects[i].chunk_start_seq, n_past, use_mrope, false);
auto evr = llama_decode(llama_ctx_v4, batch.batch);
if(evr!=0)
{
@ -4321,10 +4322,10 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
}
else
{
printf("\rProcessing Media Separator (%d tokens)",sepsize);
printf("\rProcessing Media Start Separator (%d tokens)",start_size);
}
n_past += sepsize;
llavatokensevaled += sepsize;
n_past += start_size;
llavatokensevaled += start_size;
}
for(int j=0;j<media_objects[i].mediachunks.size();++j)
@ -4348,6 +4349,23 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
return output;
}
}
int end_size = media_objects[i].chunk_end_seq.size();
if (end_size > 0) {
//add a separator between each image
kcpp_embd_batch batch = kcpp_embd_batch(media_objects[i].chunk_end_seq, n_past, use_mrope, false);
auto evr = llama_decode(llama_ctx_v4, batch.batch);
if(evr!=0)
{
printf("\nError when appending media separator: %d\n",evr);
}
else
{
printf("\rProcessing Media End Separator (%d tokens)",end_size);
}
n_past += end_size;
llavatokensevaled += end_size;
}
}
if(llavatokenscounted!=llavatokensevaled)
{

View file

@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
-->
<script id="init-config">
const LITEVER = 262;
const LITEVER = 263;
const urlParams = new URLSearchParams(window.location.search);
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@ -3146,7 +3146,7 @@ Current version indicated by LITEVER below.
var selected_models = []; //this stores ALL selected models properties as array of objects
var worker_data = [];
var selected_workers = [];
//gametext_arr stores images inline, with the special format [<|p|id|p|>] or [<|d|id|d|>], which is either an ID for loaded image data, or an ID for pending requests
//gametext_arr stores images inline, with the special format [<|p|id|p|>] or [<|h|hash|h|>], which is either a hash for loaded media data, or an ID for pending requests
var gametext_arr = []; //array of texts currently displayed
var redo_arr = []; //array of texts that are in the redo stack
var retry_prev_text = []; //when we retry, save the last 3 versions in case they want to undo
@ -3181,7 +3181,6 @@ Current version indicated by LITEVER below.
var image_db = {}; //stores a dictionary of pending images
var interrogation_db = {};
var completed_imgs_meta = {}; //stores temp info on completed images like alt text
var img_hash_to_b64_lookup = {}; //used to revert imghash to b64. temporary storage
var data_hash_to_blob_lookup = {}; //used for temporary blob storage, such as with embedded audio
//key is ID, body is {done:false,queue:10,result:""}
var stablemodels = [{"name": "stable_diffusion","count": 1}]; //stored as {name,count}
@ -7454,7 +7453,7 @@ Current version indicated by LITEVER below.
let export_arr_no_img = [];
let export_hashes = {};
for (let i = 0; i < gametext_arr.length; ++i) {
export_arr_no_img.push(gametext_arr[i].replace(/\[<\|p\|.+?\|p\|>\]/g, "").replace(/\[<\|d\|.+?\|d\|>\]/g, ""));
export_arr_no_img.push(gametext_arr[i].replace(/\[<\|p\|.+?\|p\|>\]/g, "").replace(/\[<\|h\|.+?\|h\|>\]/g, ""));
}
if(!save_images)
{
@ -7464,12 +7463,11 @@ Current version indicated by LITEVER below.
{
//bake used image metas into savefile
for (let i = 0; i < gametext_arr.length; ++i) {
let matches = gametext_arr[i].match(/\[<\|d\|.+?\|d\|>\]/g);
for(let m in matches)
{
let inner = matches[m].substring(5, matches[m].length - 5);
let imghash = cyrb_hash(inner);
if (completed_imgs_meta[imghash] != null) {
let matches = gametext_arr[i].matchAll(/\[<\|h\|(.+?)\|h\|>\]/g);
for (const match of matches) {
let imghash = match[1];
if (completed_imgs_meta[imghash] != null)
{
export_hashes[imghash] = completed_imgs_meta[imghash];
}
}
@ -8028,7 +8026,16 @@ Current version indicated by LITEVER below.
{
for (var key in storyobj.completed_imgs_meta)
{
let oldb64 = "";
if(completed_imgs_meta[key] && completed_imgs_meta[key].data)
{
oldb64 = completed_imgs_meta[key].data;
}
completed_imgs_meta[key] = storyobj.completed_imgs_meta[key];
if(completed_imgs_meta[key] && !completed_imgs_meta[key].data && oldb64)
{
completed_imgs_meta[key].data = oldb64;
}
if(completed_imgs_meta[key] && completed_imgs_meta[key].visionmode==4)
{
completed_imgs_meta[key].visionmode = 3; //todo: temporary backwards compat, to be removed.
@ -10261,7 +10268,7 @@ Current version indicated by LITEVER below.
{
document.getElementById("oairoledropdown").value = localsettings.saved_oai_role;
}
if(document.getElementById("customapidropdown").value==7) //mistral api supports prefill
if(document.getElementById("customapidropdown").value==7 || (document.getElementById("customapidropdown").value==2 && document.getElementById("custom_oai_endpoint").value.includes(".moonshot."))) //mistral api supports prefill
{
document.getElementById("oaiemulatecompletionsbox").classList.remove("hidden");
}
@ -10386,9 +10393,11 @@ Current version indicated by LITEVER below.
|| dropdown.value.includes("text-davinci-001") || dropdown.value.includes("gpt-3.5-turbo-instruct") || dropdown.value == "davinci");
if(autotoggle_check)
{
document.getElementById("useoaichatcompl").disabled = false;
if(ep_should_always_use_chat_completions() || dropdown.selectedIndex==dropdown.options.length-1)
{
document.getElementById("useoaichatcompl").checked = true;
document.getElementById("useoaichatcompl").disabled = true;
} else if (document.getElementById("custom_oai_endpoint").value.toLowerCase().includes("featherless.ai")) {
document.getElementById("useoaichatcompl").checked = false; //use completions for a better experience
} else {
@ -10564,10 +10573,10 @@ Current version indicated by LITEVER below.
if(desired_oai_key!="" && desired_oai_key!=dummy_api_key){
oaiheaders["Authorization"] = "Bearer " + desired_oai_key;
};
if (desired_oai_ep.toLowerCase().includes("api.mistral.ai")) {
if (desired_oai_ep.toLowerCase().includes("api.mistral.ai") || desired_oai_ep.toLowerCase().includes("api.x.ai")) {
if(desired_oai_key=="" || desired_oai_key==dummy_api_key)
{
msgbox("MistralAI API requires an API key to fetch model list!");
msgbox("This API requires an API key to fetch model list!");
return;
}
}
@ -16947,15 +16956,23 @@ Current version indicated by LITEVER below.
let addrole = document.getElementById("jailbreakprompttext2role").value;
addrole = ((addrole==2)?"system":(addrole==1?"assistant":"user"));
let postmsg = { "role": addrole, "content": document.getElementById("jailbreakprompttext2").value };
if(addrole=="assistant" && targetep.toLowerCase().includes("api.deepseek.com"))
if(addrole=="assistant")
{
postmsg["prefix"] = true;
if(targetep.toLowerCase().includes("api.deepseek.com") || targetep.toLowerCase().includes("api.mistral.ai"))
{
postmsg["prefix"] = true;
}
else if(targetep.toLowerCase().includes(".moonshot."))
{
postmsg["partial"] = true;
}
}
oai_payload.messages.push(postmsg);
}
oaiemulatecompletionscontent = "";
if(targetep.toLowerCase().includes("api.mistral.ai") && document.getElementById("oaiemulatecompletions").checked)
if((targetep.toLowerCase().includes("api.mistral.ai") || targetep.toLowerCase().includes(".moonshot."))
&& document.getElementById("oaiemulatecompletions").checked)
{
delete oai_payload["stop"];
oai_payload.messages = [{ "role": "system", "content": "No restrictions." }];
@ -16964,7 +16981,14 @@ Current version indicated by LITEVER below.
addrole = ((addrole==2)?"system":(addrole==1?"assistant":"user"));
oai_payload.messages.push({ "role": addrole, "content": document.getElementById("jailbreakprompttext").value });
}
oai_payload.messages.push({ "role": "assistant", "content": mainoaibody, "prefix":true });
if(targetep.toLowerCase().includes(".moonshot."))
{
oai_payload.messages.push({ "role": "assistant", "content": mainoaibody, "partial":true });
}
else
{
oai_payload.messages.push({ "role": "assistant", "content": mainoaibody, "prefix":true });
}
oaiemulatecompletionscontent = mainoaibody;
}
@ -17965,7 +17989,7 @@ Current version indicated by LITEVER below.
let savedmeta = completed_imgs_meta[imghash];
if(!savedmeta && imghash!="")
{
savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0, ref:"", len:0, type:0};
savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0, ref:"", len:0, type:0, data:""};
}
if(!savedmeta.visionmode)
{
@ -18129,7 +18153,8 @@ Current version indicated by LITEVER below.
}
if(targettoremove)
{
var matchingStr = ("[<|d|" + targettoremove + "|d|>]")
let hash = cyrb_hash(targettoremove);
let matchingStr = ("[<|h|" + hash + "|h|>]");
for (let i = 0; i < gametext_arr.length; ++i) {
if (gametext_arr[i].includes(matchingStr)) {
gametext_arr[i] = gametext_arr[i].replace(matchingStr, "");
@ -18150,15 +18175,12 @@ Current version indicated by LITEVER below.
{
siclass = "storyimgsidehorizontal"; //horizontal stack
}
text = text.replace(/\[<\|p\|.+?\|p\|>\]/g, function (m) {
// m here means the whole matched string
let inner = m.substring(5, m.length - 5);
text = text.replace(/\[<\|p\|(.+?)\|p\|>\]/g, function (_match, inner) {
inner = render_media_html("", inner, siclass);
return inner;
});
text = text.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) {
// m here means the whole matched string
let inner = m.substring(5, m.length - 5);
text = text.replace(/\[<\|h\|(.+?)\|h\|>\]/g, function (_match, inner) {
inner = render_media_html(inner, "", siclass);
return inner;
});
@ -18166,22 +18188,31 @@ Current version indicated by LITEVER below.
return text;
}
function render_media_html(data, pend_txt = "", siclass="storyimgfloat")
function render_media_html(hash, pend_txt = "", siclass="storyimgfloat")
{
//if it's a meta reference, retrieve actual data
let data = "";
if(hash!="")
{
if(completed_imgs_meta[hash] != null && completed_imgs_meta[hash].data)
{
data = completed_imgs_meta[hash].data;
}
}
if(data.startsWith("data:audio"))
{
return render_audio_html(data);
return render_audio_html(hash, data);
}
else //also handles ALL pending items
{
return render_image_html(data, pend_txt, siclass);
return render_image_html(hash, data, pend_txt, siclass);
}
return "";
}
function render_audio_html(data)
function render_audio_html(hash, data)
{
let audiohash = cyrb_hash(data).trim();
let audiohash = hash.trim();
let audioblob = b64_to_persistent_blob(data,audiohash);
let filename = "";
let len = 0;
@ -18195,7 +18226,7 @@ Current version indicated by LITEVER below.
return str;
}
function render_image_html(data, pend_txt = "", siclass="storyimgfloat") {
function render_image_html(hash, data, pend_txt = "", siclass="storyimgfloat") {
var dim = PREVIEW_RES_PX; //image preview. adventure mode has smaller pictures
dimW = dim;
dimH = dim;
@ -18213,7 +18244,7 @@ Current version indicated by LITEVER below.
return `<div class="${siclass}${reinvertcolor}" contenteditable="false"><img src="" width=${dim} height=${dim} style="border-radius: 6%;" title="${alttxt}" alt="${pend_txt}"><div class=\"imgloader\"></div><div class=\"imagelabel\">${waittime}</div></div>`;
} else {
let imghash = cyrb_hash(data).trim();
let imghash = hash.trim();
if (completed_imgs_meta[imghash] != null) {
alttxt = completed_imgs_meta[imghash].prompt?escape_html(completed_imgs_meta[imghash].prompt):"";
if(completed_imgs_meta[imghash].aspect==1) //portrait
@ -18568,7 +18599,7 @@ Current version indicated by LITEVER below.
{
const pat = /<t2i>(.*?)<\/t2i>/g;
gentxtspeak = gentxtspeak.replace(pat, "");
const pat2 = /{{\[IMG_.{1,8}_REF\]}}/g;
const pat2 = /{{\[DAT_.{1,8}_REF\]}}/g;
gentxtspeak = gentxtspeak.replace(pat2, "");
}
@ -18776,13 +18807,13 @@ Current version indicated by LITEVER below.
hasChangedImage = true; //set here to update timers
if (img.done == true && img.result != "") {
needToSave = true;
let newstr = "[<|d|" + img.result + "|d|>]";
let metaid = cyrb_hash(img.result);
let newstr = `[<|h|${metaid}|h|>]`;
console.log("Replacing with Image: " + matchstr);
gametext_arr[i] = gametext_arr[i].replace(matchstr, newstr);
let metaid = cyrb_hash(img.result);
//default to llava if supported, and image is self uploaded
let desiredvismode = ((image_db[key].imsource==1 && ((is_using_kcpp_with_vision() && image_db[key].type==0) || (is_using_kcpp_with_audio() && image_db[key].type==1)))?3:0);
completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", visionmode:desiredvismode, aspect:image_db[key].aspect, ref:image_db[key].imrefid, len:image_db[key].len, type:image_db[key].type};
completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", visionmode:desiredvismode, aspect:image_db[key].aspect, ref:image_db[key].imrefid, len:image_db[key].len, type:image_db[key].type, data:img.result};
delete image_db[key];
}
}
@ -19465,6 +19496,17 @@ Current version indicated by LITEVER below.
for (var i = 0; i < tmpstory.actions.length; ++i) {
gametext_arr.push(tmpstory.actions[i]);
}
//handle updated image metadata
if(tmpstory.completed_imgs_meta)
{
for (var key in tmpstory.completed_imgs_meta)
{
if(!completed_imgs_meta[key])
{
completed_imgs_meta[key] = tmpstory.completed_imgs_meta[key];
}
}
}
render_gametext(false);
}
else
@ -19749,29 +19791,25 @@ Current version indicated by LITEVER below.
}
return `<span class=\"color_pink\">`+m+`</span>`;
});
text = text.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) {
let inner = m.substring(5, m.length - 5);
let imghash = cyrb_hash(inner);
img_hash_to_b64_lookup[imghash] = m;
let hashtag = `{{[IMG_${imghash}_REF]}}`;
if(!addspan)
{
text = text.replace(/\[<\|h\|(.+?)\|h\|>\]/g, function (_match, inner) {
let hashtag = `{{[DAT_${inner}_REF]}}`;
if (!addspan) {
return hashtag;
}
return `<span class=\"color_pink\">${hashtag}</span>`;
return `<span class="color_pink">${hashtag}</span>`;
});
return text;
}
function unstash_image_placeholders(text)
{
return text.replace(/{{\[IMG_.{1,8}_REF\]}}/g, function (m) {
return text.replace(/{{\[DAT_.{1,8}_REF\]}}/g, function (m) {
let imghash = m.substring(7, m.length - 7);
if(!imghash)
{
return m;
}
let unstash = img_hash_to_b64_lookup[imghash];
let unstash = `[<|h|${imghash}|h|>]`;
if(!unstash)
{
return m;
@ -19795,18 +19833,7 @@ Current version indicated by LITEVER below.
retry_preserve_last = false;
redo_prev_text = [];
//stash images
gametext_elem.querySelectorAll('div.storyimgcenter,div.storyimgsidevertical,div.storyimgsidehorizontal,div.storyimgfloat').forEach(
(el) => {
let chimg = el.getElementsByTagName("img")[0];
if(el && chimg)
{
el.replaceWith((chimg.alt == null || chimg.alt == "") ? ("[<|d|" + chimg.src + "|d|>]") : ("[<|p|" + chimg.alt + "|p|>]"))
}
}
);
//replace b64 image placeholders
//replace b64 image placeholders back to the actual stored format
gametext_elem.innerHTML = unstash_image_placeholders(gametext_elem.innerHTML);
let editedChunks = []; //use to count chunk lengths before merging
@ -19816,7 +19843,6 @@ Current version indicated by LITEVER below.
}
);
//strip chunks (optimize for firefox by not constantly modifying dom)
let htmlstr = gametext_elem.innerHTML;
htmlstr = htmlstr.replace(/<span class="(.+?)">(.+?)<\/span>/g, "$2");
@ -19829,11 +19855,6 @@ Current version indicated by LITEVER below.
//rather than dump it all into one history, let's split it into paragraphs
let fullmergedstory = gametext_elem.innerText;
//if it ends with a single newline, remove it to avoid ghost newlines
if (fullmergedstory.endsWith("\n") && !fullmergedstory.endsWith("\n\n")) {
fullmergedstory = fullmergedstory.slice(0, -1);
}
let newestChunk = "";
if(editedChunks.length>1) //split by chunk lengths in reverse order, we only want the newest
{
@ -19846,6 +19867,19 @@ Current version indicated by LITEVER below.
}
}
//if it ends with a single newline, remove it to avoid ghost newlines
if (newestChunk) {
if (newestChunk.endsWith("\n") && !newestChunk.endsWith("\n\n")) {
newestChunk = newestChunk.slice(0, -1);
}
}
else
{
if (fullmergedstory.endsWith("\n") && !fullmergedstory.endsWith("\n\n")) {
fullmergedstory = fullmergedstory.slice(0, -1);
}
}
//split by newlines for the rest
if(fullmergedstory.length>0)
{
@ -19906,7 +19940,7 @@ Current version indicated by LITEVER below.
fulltxt = fulltxt.replace(/\[&lt;\|p\|.+?\|p\|&gt;\]/g, function (m) {
return unescape_html(m);
});
fulltxt = fulltxt.replace(/\[&lt;\|d\|.+?\|d\|&gt;\]/g, function (m) {
fulltxt = fulltxt.replace(/\[&lt;\|h\|.+?\|h\|&gt;\]/g, function (m) {
return unescape_html(m) ;
});
fulltxt = fulltxt.replace(/\[&lt;\|.+?\|&gt;\]/g, function (m) {
@ -19950,12 +19984,11 @@ Current version indicated by LITEVER below.
{
insertAIVisionImages = []; //a bit hacky
insertAIAudioSounds = [];
fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) {
// m here means the whole matched string
let inner = m.substring(5, m.length - 5);
let imghash = cyrb_hash(inner);
fulltxt = fulltxt.replace(/\[<\|h\|(.+?)\|h\|>\]/g, function (_match, inner) {
let imghash = inner;
let foundmeta = completed_imgs_meta[imghash];
if (foundmeta != null) {
let data = foundmeta.data;
if(foundmeta.desc && (foundmeta.visionmode==1||foundmeta.visionmode==2))
{
return "\n(Attached Image: " + foundmeta.desc + ")\n";
@ -19963,14 +19996,14 @@ Current version indicated by LITEVER below.
else if(foundmeta.visionmode==3)
{
let placeholder = "";
let parts = inner.split(',');
let parts = data.split(',');
if (parts.length === 2 && parts[0].startsWith('data:image')) {
insertAIVisionImages.push(inner);
insertAIVisionImages.push(data);
placeholder = "\n(Attached Image)\n";
}
else if(parts.length === 2 && parts[0].startsWith('data:audio'))
{
insertAIAudioSounds.push(inner);
insertAIAudioSounds.push(data);
placeholder = "\n(Attached Audio)\n";
}
return placeholder;
@ -19980,7 +20013,7 @@ Current version indicated by LITEVER below.
});
}
fulltxt = fulltxt.replace(/\[<\|p\|.+?\|p\|>\]/g, stripimg_replace_str);
fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, stripimg_replace_str);
fulltxt = fulltxt.replace(/\[<\|h\|.+?\|h\|>\]/g, stripimg_replace_str);
//always filter comments - new format
fulltxt = fulltxt.replace(/\[<\|[\s\S]+?\|>\]/g, ""); //remove normal comments too
@ -19992,11 +20025,12 @@ Current version indicated by LITEVER below.
function migrate_old_images_in_gametext()
{
let oldctx = concat_gametext(false, "", "", "", false);
let mustMigrate = false;
//if we have no new images
if (!(/\[<\|p\|.+?\|p\|>\]/.test(oldctx)) && !(/\[<\|d\|.+?\|d\|>\]/.test(oldctx))) {
//but we also have old images
if ((/<\|p\|.+?\|p\|>/.test(oldctx)) || (/<\|d\|.+?\|d\|>/.test(oldctx))) {
mustMigrate = true;
console.log("Migrating old images from saved story");
for (let i = 0; i < gametext_arr.length; ++i) {
gametext_arr[i] = gametext_arr[i].replace(/<\|p\|.+?\|p\|>/g, function (m) {
@ -20008,6 +20042,23 @@ Current version indicated by LITEVER below.
}
}
}
//now, migrate all unhashed inline images into their final placeholder form
if(mustMigrate || (/\[<\|d\|.+?\|d\|>\]/.test(oldctx)))
{
console.log("Migrating old images 2 from saved story");
for (let i = 0; i < gametext_arr.length; ++i) {
gametext_arr[i] = gametext_arr[i].replace(/\[<\|d\|(.+?)\|d\|>\]/g, function (match, p1) {
let imghash = cyrb_hash(p1);
if(!completed_imgs_meta[imghash])
{
completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0, ref:"", len:0, type:0, data: ""};
}
completed_imgs_meta[imghash].data = p1;
return `[<|h|${imghash}|h|>]`;
});
}
}
}
function update_pending_stream_displays()
@ -21417,10 +21468,10 @@ Current version indicated by LITEVER below.
}
//a quick fix that adds a newline if there's none before opponent chat and a picture
var othernamesregexreplace = new RegExp("\\|[d|p]\\|>(?!" + localsettings.chatname + ").+?\\: ", "gi");
var othernamesregexreplace = new RegExp("\\|[h|p]\\|>](?!" + localsettings.chatname + ").+?\\: ", "gi");
input = input.replace(othernamesregexreplace, function (m) {
let rep = m.substring(0,4) + "\n" + m.substring(4);
let rep = m.substring(0,5) + "\n" + m.substring(5);
return rep;
});

View file

@ -512,6 +512,8 @@ struct media_object
std::string b64data = "";
std::vector<media_chunk> mediachunks;
bool is_audio = false; //if true its audio, otherwise its vision
std::vector<int> chunk_start_seq;
std::vector<int> chunk_end_seq;
};
struct speculative_draft_result