updated lite, added better separators for multimodal chunks (universal)

This commit is contained in:
Concedo 2025-07-17 00:11:08 +08:00
parent 2a59adce0f
commit f0564f9caf
3 changed files with 163 additions and 92 deletions

View file

@ -3009,13 +3009,12 @@ int GetThreadsToUse(bool blasmode)
} }
//this function prepares the clip embds for llava. it's only needed when images change //this function prepares the clip embds for llava. it's only needed when images change
static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_sep, const std::vector<int> & media_intro) static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_intro)
{ {
bool vision_on = (clp_ctx_v != nullptr && clp_img_data != nullptr); bool vision_on = (clp_ctx_v != nullptr && clp_img_data != nullptr);
bool audio_on = (clp_ctx_a != nullptr); bool audio_on = (clp_ctx_a != nullptr);
if (vision_on || audio_on) if (vision_on || audio_on)
{ {
int sepsize = media_sep.size();
int introsize = media_intro.size(); int introsize = media_intro.size();
last_media_mem.clear(); last_media_mem.clear();
@ -3048,7 +3047,7 @@ static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_sep
int cliptokensneeded = chunk.clp_image_tokens; int cliptokensneeded = chunk.clp_image_tokens;
if(cliptokensneeded>0 && cliptokensneeded < nctx) if(cliptokensneeded>0 && cliptokensneeded < nctx)
{ {
int tokcnt = (i==0?(chunk.clp_image_tokens):(chunk.clp_image_tokens+sepsize)); int tokcnt = (chunk.clp_image_tokens + media_objects[i].chunk_start_seq.size() + media_objects[i].chunk_end_seq.size());
if(i==0) if(i==0)
{ {
tokcnt += introsize; tokcnt += introsize;
@ -3101,7 +3100,7 @@ static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_sep
int cliptokensneeded = total_chunk_tokens; int cliptokensneeded = total_chunk_tokens;
if(cliptokensneeded>0 && cliptokensneeded < nctx) if(cliptokensneeded>0 && cliptokensneeded < nctx)
{ {
int tokcnt = (i==0?(cliptokensneeded):(cliptokensneeded+sepsize)); int tokcnt = (cliptokensneeded + media_objects[i].chunk_start_seq.size() + media_objects[i].chunk_end_seq.size());
if(i==0) if(i==0)
{ {
tokcnt += introsize; tokcnt += introsize;
@ -3289,6 +3288,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
media_object lv; media_object lv;
lv.b64data = item; lv.b64data = item;
lv.is_audio = false; lv.is_audio = false;
TokenizeString("<image>", lv.chunk_start_seq, file_format, false);
TokenizeString("</image>\n\n", lv.chunk_end_seq, file_format, false);
media_objects.push_back(lv); media_objects.push_back(lv);
new_media_composite += item; new_media_composite += item;
} }
@ -3301,6 +3302,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
media_object lv; media_object lv;
lv.b64data = item; lv.b64data = item;
lv.is_audio = true; lv.is_audio = true;
TokenizeString("<audio>", lv.chunk_start_seq, file_format, false);
TokenizeString("</audio>\n\n", lv.chunk_end_seq, file_format, false);
media_objects.push_back(lv); media_objects.push_back(lv);
new_media_composite += item; new_media_composite += item;
} }
@ -3473,8 +3476,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
// tokenize the prompt // tokenize the prompt
std::vector<int> embd_inp; std::vector<int> embd_inp;
std::vector<int> embd_inp_mem; //for storing added memory std::vector<int> embd_inp_mem; //for storing added memory
std::vector<int> media_sep; //to separate between different llava images std::vector<int> media_intro; //added before media list
std::vector<int> media_intro; //to separate between different llava images
std::vector<int> guidance_embd; //holds the guidance prompt std::vector<int> guidance_embd; //holds the guidance prompt
bool media_embds_built = false; bool media_embds_built = false;
@ -3482,7 +3484,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
TokenizeString(kcpp_data->prompt, embd_inp, file_format, add_bos_token); TokenizeString(kcpp_data->prompt, embd_inp, file_format, add_bos_token);
bool use_mrope = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL); bool use_mrope = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
TokenizeString("\n\n", media_sep, file_format, false);
TokenizeString("\nAttached Media:\n", media_intro, file_format, false); TokenizeString("\nAttached Media:\n", media_intro, file_format, false);
if(media_composite_image_signature=="") if(media_composite_image_signature=="")
@ -3491,7 +3492,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
} }
if(media_data_changed) if(media_data_changed)
{ {
PrepareMediaEmbds(nctx, media_sep, media_intro); PrepareMediaEmbds(nctx, media_intro);
media_embds_built = true; media_embds_built = true;
} }
@ -4263,7 +4264,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
{ {
if(!media_embds_built) //this should never happen! however, handle it anyway if(!media_embds_built) //this should never happen! however, handle it anyway
{ {
PrepareMediaEmbds(nctx, media_sep, media_intro); PrepareMediaEmbds(nctx, media_intro);
media_embds_built = true; media_embds_built = true;
printf("\nSomehow vision embd was not prepared (maybe no fast forward), rebuilding it...\n"); printf("\nSomehow vision embd was not prepared (maybe no fast forward), rebuilding it...\n");
} }
@ -4278,7 +4279,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
//batch is empty, do image processing //batch is empty, do image processing
int llavatokenscounted = 0; int llavatokenscounted = 0;
int llavatokensevaled = 0; int llavatokensevaled = 0;
int sepsize = media_sep.size();
int introsize = media_intro.size(); int introsize = media_intro.size();
while(input_consumed < embd_inp.size() && (embd_inp[input_consumed]==MEDIA_TOKEN_IDENTIFIER_A || embd_inp[input_consumed]==MEDIA_TOKEN_IDENTIFIER_B)) while(input_consumed < embd_inp.size() && (embd_inp[input_consumed]==MEDIA_TOKEN_IDENTIFIER_A || embd_inp[input_consumed]==MEDIA_TOKEN_IDENTIFIER_B))
{ {
@ -4310,10 +4310,11 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
n_past += introsize; n_past += introsize;
llavatokensevaled += introsize; llavatokensevaled += introsize;
} }
if(sepsize>0 && i>0)
{ int start_size = media_objects[i].chunk_start_seq.size();
if (start_size > 0) {
//add a separator between each image //add a separator between each image
kcpp_embd_batch batch = kcpp_embd_batch(media_sep, n_past, use_mrope, false); kcpp_embd_batch batch = kcpp_embd_batch(media_objects[i].chunk_start_seq, n_past, use_mrope, false);
auto evr = llama_decode(llama_ctx_v4, batch.batch); auto evr = llama_decode(llama_ctx_v4, batch.batch);
if(evr!=0) if(evr!=0)
{ {
@ -4321,10 +4322,10 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
} }
else else
{ {
printf("\rProcessing Media Separator (%d tokens)",sepsize); printf("\rProcessing Media Start Separator (%d tokens)",start_size);
} }
n_past += sepsize; n_past += start_size;
llavatokensevaled += sepsize; llavatokensevaled += start_size;
} }
for(int j=0;j<media_objects[i].mediachunks.size();++j) for(int j=0;j<media_objects[i].mediachunks.size();++j)
@ -4348,6 +4349,23 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
return output; return output;
} }
} }
int end_size = media_objects[i].chunk_end_seq.size();
if (end_size > 0) {
//add a separator between each image
kcpp_embd_batch batch = kcpp_embd_batch(media_objects[i].chunk_end_seq, n_past, use_mrope, false);
auto evr = llama_decode(llama_ctx_v4, batch.batch);
if(evr!=0)
{
printf("\nError when appending media separator: %d\n",evr);
}
else
{
printf("\rProcessing Media End Separator (%d tokens)",end_size);
}
n_past += end_size;
llavatokensevaled += end_size;
}
} }
if(llavatokenscounted!=llavatokensevaled) if(llavatokenscounted!=llavatokensevaled)
{ {

View file

@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
--> -->
<script id="init-config"> <script id="init-config">
const LITEVER = 262; const LITEVER = 263;
const urlParams = new URLSearchParams(window.location.search); const urlParams = new URLSearchParams(window.location.search);
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_"; const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@ -3146,7 +3146,7 @@ Current version indicated by LITEVER below.
var selected_models = []; //this stores ALL selected models properties as array of objects var selected_models = []; //this stores ALL selected models properties as array of objects
var worker_data = []; var worker_data = [];
var selected_workers = []; var selected_workers = [];
//gametext_arr stores images inline, with the special format [<|p|id|p|>] or [<|d|id|d|>], which is either an ID for loaded image data, or an ID for pending requests //gametext_arr stores images inline, with the special format [<|p|id|p|>] or [<|h|hash|h|>], which is either a hash for loaded media data, or an ID for pending requests
var gametext_arr = []; //array of texts currently displayed var gametext_arr = []; //array of texts currently displayed
var redo_arr = []; //array of texts that are in the redo stack var redo_arr = []; //array of texts that are in the redo stack
var retry_prev_text = []; //when we retry, save the last 3 versions in case they want to undo var retry_prev_text = []; //when we retry, save the last 3 versions in case they want to undo
@ -3181,7 +3181,6 @@ Current version indicated by LITEVER below.
var image_db = {}; //stores a dictionary of pending images var image_db = {}; //stores a dictionary of pending images
var interrogation_db = {}; var interrogation_db = {};
var completed_imgs_meta = {}; //stores temp info on completed images like alt text var completed_imgs_meta = {}; //stores temp info on completed images like alt text
var img_hash_to_b64_lookup = {}; //used to revert imghash to b64. temporary storage
var data_hash_to_blob_lookup = {}; //used for temporary blob storage, such as with embedded audio var data_hash_to_blob_lookup = {}; //used for temporary blob storage, such as with embedded audio
//key is ID, body is {done:false,queue:10,result:""} //key is ID, body is {done:false,queue:10,result:""}
var stablemodels = [{"name": "stable_diffusion","count": 1}]; //stored as {name,count} var stablemodels = [{"name": "stable_diffusion","count": 1}]; //stored as {name,count}
@ -7454,7 +7453,7 @@ Current version indicated by LITEVER below.
let export_arr_no_img = []; let export_arr_no_img = [];
let export_hashes = {}; let export_hashes = {};
for (let i = 0; i < gametext_arr.length; ++i) { for (let i = 0; i < gametext_arr.length; ++i) {
export_arr_no_img.push(gametext_arr[i].replace(/\[<\|p\|.+?\|p\|>\]/g, "").replace(/\[<\|d\|.+?\|d\|>\]/g, "")); export_arr_no_img.push(gametext_arr[i].replace(/\[<\|p\|.+?\|p\|>\]/g, "").replace(/\[<\|h\|.+?\|h\|>\]/g, ""));
} }
if(!save_images) if(!save_images)
{ {
@ -7464,12 +7463,11 @@ Current version indicated by LITEVER below.
{ {
//bake used image metas into savefile //bake used image metas into savefile
for (let i = 0; i < gametext_arr.length; ++i) { for (let i = 0; i < gametext_arr.length; ++i) {
let matches = gametext_arr[i].match(/\[<\|d\|.+?\|d\|>\]/g); let matches = gametext_arr[i].matchAll(/\[<\|h\|(.+?)\|h\|>\]/g);
for(let m in matches) for (const match of matches) {
{ let imghash = match[1];
let inner = matches[m].substring(5, matches[m].length - 5); if (completed_imgs_meta[imghash] != null)
let imghash = cyrb_hash(inner); {
if (completed_imgs_meta[imghash] != null) {
export_hashes[imghash] = completed_imgs_meta[imghash]; export_hashes[imghash] = completed_imgs_meta[imghash];
} }
} }
@ -8028,7 +8026,16 @@ Current version indicated by LITEVER below.
{ {
for (var key in storyobj.completed_imgs_meta) for (var key in storyobj.completed_imgs_meta)
{ {
let oldb64 = "";
if(completed_imgs_meta[key] && completed_imgs_meta[key].data)
{
oldb64 = completed_imgs_meta[key].data;
}
completed_imgs_meta[key] = storyobj.completed_imgs_meta[key]; completed_imgs_meta[key] = storyobj.completed_imgs_meta[key];
if(completed_imgs_meta[key] && !completed_imgs_meta[key].data && oldb64)
{
completed_imgs_meta[key].data = oldb64;
}
if(completed_imgs_meta[key] && completed_imgs_meta[key].visionmode==4) if(completed_imgs_meta[key] && completed_imgs_meta[key].visionmode==4)
{ {
completed_imgs_meta[key].visionmode = 3; //todo: temporary backwards compat, to be removed. completed_imgs_meta[key].visionmode = 3; //todo: temporary backwards compat, to be removed.
@ -10261,7 +10268,7 @@ Current version indicated by LITEVER below.
{ {
document.getElementById("oairoledropdown").value = localsettings.saved_oai_role; document.getElementById("oairoledropdown").value = localsettings.saved_oai_role;
} }
if(document.getElementById("customapidropdown").value==7) //mistral api supports prefill if(document.getElementById("customapidropdown").value==7 || (document.getElementById("customapidropdown").value==2 && document.getElementById("custom_oai_endpoint").value.includes(".moonshot."))) //mistral api supports prefill
{ {
document.getElementById("oaiemulatecompletionsbox").classList.remove("hidden"); document.getElementById("oaiemulatecompletionsbox").classList.remove("hidden");
} }
@ -10386,9 +10393,11 @@ Current version indicated by LITEVER below.
|| dropdown.value.includes("text-davinci-001") || dropdown.value.includes("gpt-3.5-turbo-instruct") || dropdown.value == "davinci"); || dropdown.value.includes("text-davinci-001") || dropdown.value.includes("gpt-3.5-turbo-instruct") || dropdown.value == "davinci");
if(autotoggle_check) if(autotoggle_check)
{ {
document.getElementById("useoaichatcompl").disabled = false;
if(ep_should_always_use_chat_completions() || dropdown.selectedIndex==dropdown.options.length-1) if(ep_should_always_use_chat_completions() || dropdown.selectedIndex==dropdown.options.length-1)
{ {
document.getElementById("useoaichatcompl").checked = true; document.getElementById("useoaichatcompl").checked = true;
document.getElementById("useoaichatcompl").disabled = true;
} else if (document.getElementById("custom_oai_endpoint").value.toLowerCase().includes("featherless.ai")) { } else if (document.getElementById("custom_oai_endpoint").value.toLowerCase().includes("featherless.ai")) {
document.getElementById("useoaichatcompl").checked = false; //use completions for a better experience document.getElementById("useoaichatcompl").checked = false; //use completions for a better experience
} else { } else {
@ -10564,10 +10573,10 @@ Current version indicated by LITEVER below.
if(desired_oai_key!="" && desired_oai_key!=dummy_api_key){ if(desired_oai_key!="" && desired_oai_key!=dummy_api_key){
oaiheaders["Authorization"] = "Bearer " + desired_oai_key; oaiheaders["Authorization"] = "Bearer " + desired_oai_key;
}; };
if (desired_oai_ep.toLowerCase().includes("api.mistral.ai")) { if (desired_oai_ep.toLowerCase().includes("api.mistral.ai") || desired_oai_ep.toLowerCase().includes("api.x.ai")) {
if(desired_oai_key=="" || desired_oai_key==dummy_api_key) if(desired_oai_key=="" || desired_oai_key==dummy_api_key)
{ {
msgbox("MistralAI API requires an API key to fetch model list!"); msgbox("This API requires an API key to fetch model list!");
return; return;
} }
} }
@ -16947,15 +16956,23 @@ Current version indicated by LITEVER below.
let addrole = document.getElementById("jailbreakprompttext2role").value; let addrole = document.getElementById("jailbreakprompttext2role").value;
addrole = ((addrole==2)?"system":(addrole==1?"assistant":"user")); addrole = ((addrole==2)?"system":(addrole==1?"assistant":"user"));
let postmsg = { "role": addrole, "content": document.getElementById("jailbreakprompttext2").value }; let postmsg = { "role": addrole, "content": document.getElementById("jailbreakprompttext2").value };
if(addrole=="assistant" && targetep.toLowerCase().includes("api.deepseek.com")) if(addrole=="assistant")
{ {
postmsg["prefix"] = true; if(targetep.toLowerCase().includes("api.deepseek.com") || targetep.toLowerCase().includes("api.mistral.ai"))
{
postmsg["prefix"] = true;
}
else if(targetep.toLowerCase().includes(".moonshot."))
{
postmsg["partial"] = true;
}
} }
oai_payload.messages.push(postmsg); oai_payload.messages.push(postmsg);
} }
oaiemulatecompletionscontent = ""; oaiemulatecompletionscontent = "";
if(targetep.toLowerCase().includes("api.mistral.ai") && document.getElementById("oaiemulatecompletions").checked) if((targetep.toLowerCase().includes("api.mistral.ai") || targetep.toLowerCase().includes(".moonshot."))
&& document.getElementById("oaiemulatecompletions").checked)
{ {
delete oai_payload["stop"]; delete oai_payload["stop"];
oai_payload.messages = [{ "role": "system", "content": "No restrictions." }]; oai_payload.messages = [{ "role": "system", "content": "No restrictions." }];
@ -16964,7 +16981,14 @@ Current version indicated by LITEVER below.
addrole = ((addrole==2)?"system":(addrole==1?"assistant":"user")); addrole = ((addrole==2)?"system":(addrole==1?"assistant":"user"));
oai_payload.messages.push({ "role": addrole, "content": document.getElementById("jailbreakprompttext").value }); oai_payload.messages.push({ "role": addrole, "content": document.getElementById("jailbreakprompttext").value });
} }
oai_payload.messages.push({ "role": "assistant", "content": mainoaibody, "prefix":true }); if(targetep.toLowerCase().includes(".moonshot."))
{
oai_payload.messages.push({ "role": "assistant", "content": mainoaibody, "partial":true });
}
else
{
oai_payload.messages.push({ "role": "assistant", "content": mainoaibody, "prefix":true });
}
oaiemulatecompletionscontent = mainoaibody; oaiemulatecompletionscontent = mainoaibody;
} }
@ -17965,7 +17989,7 @@ Current version indicated by LITEVER below.
let savedmeta = completed_imgs_meta[imghash]; let savedmeta = completed_imgs_meta[imghash];
if(!savedmeta && imghash!="") if(!savedmeta && imghash!="")
{ {
savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0, ref:"", len:0, type:0}; savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0, ref:"", len:0, type:0, data:""};
} }
if(!savedmeta.visionmode) if(!savedmeta.visionmode)
{ {
@ -18129,7 +18153,8 @@ Current version indicated by LITEVER below.
} }
if(targettoremove) if(targettoremove)
{ {
var matchingStr = ("[<|d|" + targettoremove + "|d|>]") let hash = cyrb_hash(targettoremove);
let matchingStr = ("[<|h|" + hash + "|h|>]");
for (let i = 0; i < gametext_arr.length; ++i) { for (let i = 0; i < gametext_arr.length; ++i) {
if (gametext_arr[i].includes(matchingStr)) { if (gametext_arr[i].includes(matchingStr)) {
gametext_arr[i] = gametext_arr[i].replace(matchingStr, ""); gametext_arr[i] = gametext_arr[i].replace(matchingStr, "");
@ -18150,15 +18175,12 @@ Current version indicated by LITEVER below.
{ {
siclass = "storyimgsidehorizontal"; //horizontal stack siclass = "storyimgsidehorizontal"; //horizontal stack
} }
text = text.replace(/\[<\|p\|.+?\|p\|>\]/g, function (m) { text = text.replace(/\[<\|p\|(.+?)\|p\|>\]/g, function (_match, inner) {
// m here means the whole matched string
let inner = m.substring(5, m.length - 5);
inner = render_media_html("", inner, siclass); inner = render_media_html("", inner, siclass);
return inner; return inner;
}); });
text = text.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) {
// m here means the whole matched string text = text.replace(/\[<\|h\|(.+?)\|h\|>\]/g, function (_match, inner) {
let inner = m.substring(5, m.length - 5);
inner = render_media_html(inner, "", siclass); inner = render_media_html(inner, "", siclass);
return inner; return inner;
}); });
@ -18166,22 +18188,31 @@ Current version indicated by LITEVER below.
return text; return text;
} }
function render_media_html(data, pend_txt = "", siclass="storyimgfloat") function render_media_html(hash, pend_txt = "", siclass="storyimgfloat")
{ {
//if it's a meta reference, retrieve actual data
let data = "";
if(hash!="")
{
if(completed_imgs_meta[hash] != null && completed_imgs_meta[hash].data)
{
data = completed_imgs_meta[hash].data;
}
}
if(data.startsWith("data:audio")) if(data.startsWith("data:audio"))
{ {
return render_audio_html(data); return render_audio_html(hash, data);
} }
else //also handles ALL pending items else //also handles ALL pending items
{ {
return render_image_html(data, pend_txt, siclass); return render_image_html(hash, data, pend_txt, siclass);
} }
return ""; return "";
} }
function render_audio_html(data) function render_audio_html(hash, data)
{ {
let audiohash = cyrb_hash(data).trim(); let audiohash = hash.trim();
let audioblob = b64_to_persistent_blob(data,audiohash); let audioblob = b64_to_persistent_blob(data,audiohash);
let filename = ""; let filename = "";
let len = 0; let len = 0;
@ -18195,7 +18226,7 @@ Current version indicated by LITEVER below.
return str; return str;
} }
function render_image_html(data, pend_txt = "", siclass="storyimgfloat") { function render_image_html(hash, data, pend_txt = "", siclass="storyimgfloat") {
var dim = PREVIEW_RES_PX; //image preview. adventure mode has smaller pictures var dim = PREVIEW_RES_PX; //image preview. adventure mode has smaller pictures
dimW = dim; dimW = dim;
dimH = dim; dimH = dim;
@ -18213,7 +18244,7 @@ Current version indicated by LITEVER below.
return `<div class="${siclass}${reinvertcolor}" contenteditable="false"><img src="" width=${dim} height=${dim} style="border-radius: 6%;" title="${alttxt}" alt="${pend_txt}"><div class=\"imgloader\"></div><div class=\"imagelabel\">${waittime}</div></div>`; return `<div class="${siclass}${reinvertcolor}" contenteditable="false"><img src="" width=${dim} height=${dim} style="border-radius: 6%;" title="${alttxt}" alt="${pend_txt}"><div class=\"imgloader\"></div><div class=\"imagelabel\">${waittime}</div></div>`;
} else { } else {
let imghash = cyrb_hash(data).trim(); let imghash = hash.trim();
if (completed_imgs_meta[imghash] != null) { if (completed_imgs_meta[imghash] != null) {
alttxt = completed_imgs_meta[imghash].prompt?escape_html(completed_imgs_meta[imghash].prompt):""; alttxt = completed_imgs_meta[imghash].prompt?escape_html(completed_imgs_meta[imghash].prompt):"";
if(completed_imgs_meta[imghash].aspect==1) //portrait if(completed_imgs_meta[imghash].aspect==1) //portrait
@ -18568,7 +18599,7 @@ Current version indicated by LITEVER below.
{ {
const pat = /<t2i>(.*?)<\/t2i>/g; const pat = /<t2i>(.*?)<\/t2i>/g;
gentxtspeak = gentxtspeak.replace(pat, ""); gentxtspeak = gentxtspeak.replace(pat, "");
const pat2 = /{{\[IMG_.{1,8}_REF\]}}/g; const pat2 = /{{\[DAT_.{1,8}_REF\]}}/g;
gentxtspeak = gentxtspeak.replace(pat2, ""); gentxtspeak = gentxtspeak.replace(pat2, "");
} }
@ -18776,13 +18807,13 @@ Current version indicated by LITEVER below.
hasChangedImage = true; //set here to update timers hasChangedImage = true; //set here to update timers
if (img.done == true && img.result != "") { if (img.done == true && img.result != "") {
needToSave = true; needToSave = true;
let newstr = "[<|d|" + img.result + "|d|>]"; let metaid = cyrb_hash(img.result);
let newstr = `[<|h|${metaid}|h|>]`;
console.log("Replacing with Image: " + matchstr); console.log("Replacing with Image: " + matchstr);
gametext_arr[i] = gametext_arr[i].replace(matchstr, newstr); gametext_arr[i] = gametext_arr[i].replace(matchstr, newstr);
let metaid = cyrb_hash(img.result);
//default to llava if supported, and image is self uploaded //default to llava if supported, and image is self uploaded
let desiredvismode = ((image_db[key].imsource==1 && ((is_using_kcpp_with_vision() && image_db[key].type==0) || (is_using_kcpp_with_audio() && image_db[key].type==1)))?3:0); let desiredvismode = ((image_db[key].imsource==1 && ((is_using_kcpp_with_vision() && image_db[key].type==0) || (is_using_kcpp_with_audio() && image_db[key].type==1)))?3:0);
completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", visionmode:desiredvismode, aspect:image_db[key].aspect, ref:image_db[key].imrefid, len:image_db[key].len, type:image_db[key].type}; completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", visionmode:desiredvismode, aspect:image_db[key].aspect, ref:image_db[key].imrefid, len:image_db[key].len, type:image_db[key].type, data:img.result};
delete image_db[key]; delete image_db[key];
} }
} }
@ -19465,6 +19496,17 @@ Current version indicated by LITEVER below.
for (var i = 0; i < tmpstory.actions.length; ++i) { for (var i = 0; i < tmpstory.actions.length; ++i) {
gametext_arr.push(tmpstory.actions[i]); gametext_arr.push(tmpstory.actions[i]);
} }
//handle updated image metadata
if(tmpstory.completed_imgs_meta)
{
for (var key in tmpstory.completed_imgs_meta)
{
if(!completed_imgs_meta[key])
{
completed_imgs_meta[key] = tmpstory.completed_imgs_meta[key];
}
}
}
render_gametext(false); render_gametext(false);
} }
else else
@ -19749,29 +19791,25 @@ Current version indicated by LITEVER below.
} }
return `<span class=\"color_pink\">`+m+`</span>`; return `<span class=\"color_pink\">`+m+`</span>`;
}); });
text = text.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) { text = text.replace(/\[<\|h\|(.+?)\|h\|>\]/g, function (_match, inner) {
let inner = m.substring(5, m.length - 5); let hashtag = `{{[DAT_${inner}_REF]}}`;
let imghash = cyrb_hash(inner); if (!addspan) {
img_hash_to_b64_lookup[imghash] = m;
let hashtag = `{{[IMG_${imghash}_REF]}}`;
if(!addspan)
{
return hashtag; return hashtag;
} }
return `<span class=\"color_pink\">${hashtag}</span>`; return `<span class="color_pink">${hashtag}</span>`;
}); });
return text; return text;
} }
function unstash_image_placeholders(text) function unstash_image_placeholders(text)
{ {
return text.replace(/{{\[IMG_.{1,8}_REF\]}}/g, function (m) { return text.replace(/{{\[DAT_.{1,8}_REF\]}}/g, function (m) {
let imghash = m.substring(7, m.length - 7); let imghash = m.substring(7, m.length - 7);
if(!imghash) if(!imghash)
{ {
return m; return m;
} }
let unstash = img_hash_to_b64_lookup[imghash]; let unstash = `[<|h|${imghash}|h|>]`;
if(!unstash) if(!unstash)
{ {
return m; return m;
@ -19795,18 +19833,7 @@ Current version indicated by LITEVER below.
retry_preserve_last = false; retry_preserve_last = false;
redo_prev_text = []; redo_prev_text = [];
//stash images //replace b64 image placeholders back to the actual stored format
gametext_elem.querySelectorAll('div.storyimgcenter,div.storyimgsidevertical,div.storyimgsidehorizontal,div.storyimgfloat').forEach(
(el) => {
let chimg = el.getElementsByTagName("img")[0];
if(el && chimg)
{
el.replaceWith((chimg.alt == null || chimg.alt == "") ? ("[<|d|" + chimg.src + "|d|>]") : ("[<|p|" + chimg.alt + "|p|>]"))
}
}
);
//replace b64 image placeholders
gametext_elem.innerHTML = unstash_image_placeholders(gametext_elem.innerHTML); gametext_elem.innerHTML = unstash_image_placeholders(gametext_elem.innerHTML);
let editedChunks = []; //use to count chunk lengths before merging let editedChunks = []; //use to count chunk lengths before merging
@ -19816,7 +19843,6 @@ Current version indicated by LITEVER below.
} }
); );
//strip chunks (optimize for firefox by not constantly modifying dom) //strip chunks (optimize for firefox by not constantly modifying dom)
let htmlstr = gametext_elem.innerHTML; let htmlstr = gametext_elem.innerHTML;
htmlstr = htmlstr.replace(/<span class="(.+?)">(.+?)<\/span>/g, "$2"); htmlstr = htmlstr.replace(/<span class="(.+?)">(.+?)<\/span>/g, "$2");
@ -19829,11 +19855,6 @@ Current version indicated by LITEVER below.
//rather than dump it all into one history, let's split it into paragraphs //rather than dump it all into one history, let's split it into paragraphs
let fullmergedstory = gametext_elem.innerText; let fullmergedstory = gametext_elem.innerText;
//if it ends with a single newline, remove it to avoid ghost newlines
if (fullmergedstory.endsWith("\n") && !fullmergedstory.endsWith("\n\n")) {
fullmergedstory = fullmergedstory.slice(0, -1);
}
let newestChunk = ""; let newestChunk = "";
if(editedChunks.length>1) //split by chunk lengths in reverse order, we only want the newest if(editedChunks.length>1) //split by chunk lengths in reverse order, we only want the newest
{ {
@ -19846,6 +19867,19 @@ Current version indicated by LITEVER below.
} }
} }
//if it ends with a single newline, remove it to avoid ghost newlines
if (newestChunk) {
if (newestChunk.endsWith("\n") && !newestChunk.endsWith("\n\n")) {
newestChunk = newestChunk.slice(0, -1);
}
}
else
{
if (fullmergedstory.endsWith("\n") && !fullmergedstory.endsWith("\n\n")) {
fullmergedstory = fullmergedstory.slice(0, -1);
}
}
//split by newlines for the rest //split by newlines for the rest
if(fullmergedstory.length>0) if(fullmergedstory.length>0)
{ {
@ -19906,7 +19940,7 @@ Current version indicated by LITEVER below.
fulltxt = fulltxt.replace(/\[&lt;\|p\|.+?\|p\|&gt;\]/g, function (m) { fulltxt = fulltxt.replace(/\[&lt;\|p\|.+?\|p\|&gt;\]/g, function (m) {
return unescape_html(m); return unescape_html(m);
}); });
fulltxt = fulltxt.replace(/\[&lt;\|d\|.+?\|d\|&gt;\]/g, function (m) { fulltxt = fulltxt.replace(/\[&lt;\|h\|.+?\|h\|&gt;\]/g, function (m) {
return unescape_html(m) ; return unescape_html(m) ;
}); });
fulltxt = fulltxt.replace(/\[&lt;\|.+?\|&gt;\]/g, function (m) { fulltxt = fulltxt.replace(/\[&lt;\|.+?\|&gt;\]/g, function (m) {
@ -19950,12 +19984,11 @@ Current version indicated by LITEVER below.
{ {
insertAIVisionImages = []; //a bit hacky insertAIVisionImages = []; //a bit hacky
insertAIAudioSounds = []; insertAIAudioSounds = [];
fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) { fulltxt = fulltxt.replace(/\[<\|h\|(.+?)\|h\|>\]/g, function (_match, inner) {
// m here means the whole matched string let imghash = inner;
let inner = m.substring(5, m.length - 5);
let imghash = cyrb_hash(inner);
let foundmeta = completed_imgs_meta[imghash]; let foundmeta = completed_imgs_meta[imghash];
if (foundmeta != null) { if (foundmeta != null) {
let data = foundmeta.data;
if(foundmeta.desc && (foundmeta.visionmode==1||foundmeta.visionmode==2)) if(foundmeta.desc && (foundmeta.visionmode==1||foundmeta.visionmode==2))
{ {
return "\n(Attached Image: " + foundmeta.desc + ")\n"; return "\n(Attached Image: " + foundmeta.desc + ")\n";
@ -19963,14 +19996,14 @@ Current version indicated by LITEVER below.
else if(foundmeta.visionmode==3) else if(foundmeta.visionmode==3)
{ {
let placeholder = ""; let placeholder = "";
let parts = inner.split(','); let parts = data.split(',');
if (parts.length === 2 && parts[0].startsWith('data:image')) { if (parts.length === 2 && parts[0].startsWith('data:image')) {
insertAIVisionImages.push(inner); insertAIVisionImages.push(data);
placeholder = "\n(Attached Image)\n"; placeholder = "\n(Attached Image)\n";
} }
else if(parts.length === 2 && parts[0].startsWith('data:audio')) else if(parts.length === 2 && parts[0].startsWith('data:audio'))
{ {
insertAIAudioSounds.push(inner); insertAIAudioSounds.push(data);
placeholder = "\n(Attached Audio)\n"; placeholder = "\n(Attached Audio)\n";
} }
return placeholder; return placeholder;
@ -19980,7 +20013,7 @@ Current version indicated by LITEVER below.
}); });
} }
fulltxt = fulltxt.replace(/\[<\|p\|.+?\|p\|>\]/g, stripimg_replace_str); fulltxt = fulltxt.replace(/\[<\|p\|.+?\|p\|>\]/g, stripimg_replace_str);
fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, stripimg_replace_str); fulltxt = fulltxt.replace(/\[<\|h\|.+?\|h\|>\]/g, stripimg_replace_str);
//always filter comments - new format //always filter comments - new format
fulltxt = fulltxt.replace(/\[<\|[\s\S]+?\|>\]/g, ""); //remove normal comments too fulltxt = fulltxt.replace(/\[<\|[\s\S]+?\|>\]/g, ""); //remove normal comments too
@ -19992,11 +20025,12 @@ Current version indicated by LITEVER below.
function migrate_old_images_in_gametext() function migrate_old_images_in_gametext()
{ {
let oldctx = concat_gametext(false, "", "", "", false); let oldctx = concat_gametext(false, "", "", "", false);
let mustMigrate = false;
//if we have no new images //if we have no new images
if (!(/\[<\|p\|.+?\|p\|>\]/.test(oldctx)) && !(/\[<\|d\|.+?\|d\|>\]/.test(oldctx))) { if (!(/\[<\|p\|.+?\|p\|>\]/.test(oldctx)) && !(/\[<\|d\|.+?\|d\|>\]/.test(oldctx))) {
//but we also have old images //but we also have old images
if ((/<\|p\|.+?\|p\|>/.test(oldctx)) || (/<\|d\|.+?\|d\|>/.test(oldctx))) { if ((/<\|p\|.+?\|p\|>/.test(oldctx)) || (/<\|d\|.+?\|d\|>/.test(oldctx))) {
mustMigrate = true;
console.log("Migrating old images from saved story"); console.log("Migrating old images from saved story");
for (let i = 0; i < gametext_arr.length; ++i) { for (let i = 0; i < gametext_arr.length; ++i) {
gametext_arr[i] = gametext_arr[i].replace(/<\|p\|.+?\|p\|>/g, function (m) { gametext_arr[i] = gametext_arr[i].replace(/<\|p\|.+?\|p\|>/g, function (m) {
@ -20008,6 +20042,23 @@ Current version indicated by LITEVER below.
} }
} }
} }
//now, migrate all unhashed inline images into their final placeholder form
if(mustMigrate || (/\[<\|d\|.+?\|d\|>\]/.test(oldctx)))
{
console.log("Migrating old images 2 from saved story");
for (let i = 0; i < gametext_arr.length; ++i) {
gametext_arr[i] = gametext_arr[i].replace(/\[<\|d\|(.+?)\|d\|>\]/g, function (match, p1) {
let imghash = cyrb_hash(p1);
if(!completed_imgs_meta[imghash])
{
completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0, ref:"", len:0, type:0, data: ""};
}
completed_imgs_meta[imghash].data = p1;
return `[<|h|${imghash}|h|>]`;
});
}
}
} }
function update_pending_stream_displays() function update_pending_stream_displays()
@ -21417,10 +21468,10 @@ Current version indicated by LITEVER below.
} }
//a quick fix that adds a newline if there's none before opponent chat and a picture //a quick fix that adds a newline if there's none before opponent chat and a picture
var othernamesregexreplace = new RegExp("\\|[d|p]\\|>(?!" + localsettings.chatname + ").+?\\: ", "gi"); var othernamesregexreplace = new RegExp("\\|[h|p]\\|>](?!" + localsettings.chatname + ").+?\\: ", "gi");
input = input.replace(othernamesregexreplace, function (m) { input = input.replace(othernamesregexreplace, function (m) {
let rep = m.substring(0,4) + "\n" + m.substring(4); let rep = m.substring(0,5) + "\n" + m.substring(5);
return rep; return rep;
}); });

View file

@ -512,6 +512,8 @@ struct media_object
std::string b64data = ""; std::string b64data = "";
std::vector<media_chunk> mediachunks; std::vector<media_chunk> mediachunks;
bool is_audio = false; //if true its audio, otherwise its vision bool is_audio = false; //if true its audio, otherwise its vision
std::vector<int> chunk_start_seq;
std::vector<int> chunk_end_seq;
}; };
struct speculative_draft_result struct speculative_draft_result