llava support is now fully functioning

This commit is contained in:
Concedo 2024-03-11 15:55:32 +08:00
parent d943c739a8
commit 484d90c330
2 changed files with 193 additions and 47 deletions

View file

@ -33,6 +33,11 @@
#include "examples/llava/clip.h" #include "examples/llava/clip.h"
#include "examples/llava/llava.h" #include "examples/llava/llava.h"
//const
const int extra_context_handle_fragmentation = 80;
const int LLAVA_TOKEN_IDENTIFIER_A = -998; //alternate between both, changing when image changes
const int LLAVA_TOKEN_IDENTIFIER_B = -999;
//shared //shared
std::string executable_path = ""; std::string executable_path = "";
std::string lora_filename = ""; std::string lora_filename = "";
@ -80,6 +85,8 @@ static llama_context * llama_ctx_v4;
static clip_ctx * clp_ctx = nullptr; //for llava static clip_ctx * clp_ctx = nullptr; //for llava
static clip_image_u8 * clp_img_data = nullptr; //most recent image static clip_image_u8 * clp_img_data = nullptr; //most recent image
static std::vector<llava_image> llava_images; static std::vector<llava_image> llava_images;
static std::string llava_composite_image_signature = ""; //for identifying when the llava images change, we need to invalidate the cache
static int current_llava_identifier = LLAVA_TOKEN_IDENTIFIER_A;
static gpt_params * kcpp_params = nullptr; static gpt_params * kcpp_params = nullptr;
static int max_context_limit_at_load = 0; static int max_context_limit_at_load = 0;
@ -105,8 +112,6 @@ static std::string concat_output_reader_copy_poll = ""; //for streaming
static std::string concat_output_reader_copy_res = ""; //for gen response static std::string concat_output_reader_copy_res = ""; //for gen response
static std::vector<logit_bias> logit_biases; static std::vector<logit_bias> logit_biases;
const int extra_context_handle_fragmentation = 80;
inline bool IsNanCheck(float f) inline bool IsNanCheck(float f)
{ {
const unsigned int u = *(unsigned int*)&f; const unsigned int u = *(unsigned int*)&f;
@ -1080,7 +1085,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
} }
} }
if(mmproj_filename != "") if(mmproj_filename != "" && file_format==FileFormat::GGUF_GENERIC)
{ {
printf("\nAttempting to apply Multimodal Projector: %s\n", mmproj_filename.c_str()); printf("\nAttempting to apply Multimodal Projector: %s\n", mmproj_filename.c_str());
clp_ctx = clip_model_load(mmproj_filename.c_str(), /*verbosity=*/ 1); clp_ctx = clip_model_load(mmproj_filename.c_str(), /*verbosity=*/ 1);
@ -1593,6 +1598,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
} }
} }
llava_images.clear(); llava_images.clear();
std::string new_llava_composite = "";
for(int x=0;x<images_max;++x) for(int x=0;x<images_max;++x)
{ {
std::string item = inputs.images[x]; std::string item = inputs.images[x];
@ -1601,6 +1607,17 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
llava_image lv; llava_image lv;
lv.b64data = item; lv.b64data = item;
llava_images.push_back(lv); llava_images.push_back(lv);
new_llava_composite += item;
}
}
if(llava_composite_image_signature!=new_llava_composite)
{
//images have changed. swap identifiers to force reprocessing
current_llava_identifier = (current_llava_identifier==LLAVA_TOKEN_IDENTIFIER_A?LLAVA_TOKEN_IDENTIFIER_B:LLAVA_TOKEN_IDENTIFIER_A);
llava_composite_image_signature = new_llava_composite;
if(debugmode==1)
{
printf("\nLLAVA images changed, existing cache invalidated");
} }
} }
@ -1667,6 +1684,10 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
// tokenize the prompt // tokenize the prompt
std::vector<int> embd_inp; std::vector<int> embd_inp;
std::vector<int> embd_inp_mem; //for storing added memory std::vector<int> embd_inp_mem; //for storing added memory
std::vector<int> llava_mem; //for storing dummy tokens that will be consumed by llava
int32_t nctx = kcpp_params->n_ctx;
TokenizeString(kcpp_params->prompt, embd_inp, file_format); TokenizeString(kcpp_params->prompt, embd_inp, file_format);
if(clp_ctx!=nullptr && clp_img_data!=nullptr) if(clp_ctx!=nullptr && clp_img_data!=nullptr)
@ -1686,8 +1707,21 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
if (!llava_image_embed_make_with_clip_img(clp_ctx, kcpp_params->n_threads, clp_img_data, &llava_images[i].clp_img_embd, &llava_images[i].clp_image_tokens)) { if (!llava_image_embed_make_with_clip_img(clp_ctx, kcpp_params->n_threads, clp_img_data, &llava_images[i].clp_img_embd, &llava_images[i].clp_image_tokens)) {
printf("\nError: Clip image %d failed to create embd!",i); printf("\nError: Clip image %d failed to create embd!",i);
} }
if(debugmode==1)
{
printf("\nLLAVA Clip Embed %i used Tokens: %d",i,llava_images[i].clp_image_tokens); printf("\nLLAVA Clip Embed %i used Tokens: %d",i,llava_images[i].clp_image_tokens);
} }
if(llava_images[i].clp_image_tokens>0 && llava_images[i].clp_image_tokens < nctx)
{
for(int n=0;n<llava_images[i].clp_image_tokens;++n)
{
llava_mem.push_back(current_llava_identifier);
}
}else
{
printf("\nWarning: LLAVA Image excluded - Context size too low or not enough clip tokens!\n");
}
}
} }
} }
@ -1697,8 +1731,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
} }
//truncate to front of the prompt if its too long //truncate to front of the prompt if its too long
int32_t nctx = kcpp_params->n_ctx;
if (embd_inp.size() + kcpp_params->n_predict > nctx) if (embd_inp.size() + kcpp_params->n_predict > nctx)
{ {
//get bos token //get bos token
@ -1713,8 +1745,43 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
} }
} }
if(llava_mem.size()>0) //stick the llava mem before the added mem
{
if(llava_mem.size() + kcpp_params->n_predict + 4 > nctx)
{
printf("\nWarning: Too many LLaVA tokens, max context exceeded! They will be ignored!\n");
}
else
{
std::vector<int> bos;
TokenizeString("", bos, file_format);
if(embd_inp_mem.size()>0) //remove existing bos if exists
{
if (bos.size()>0 && !embd_inp_mem.empty() && bos[0]==embd_inp_mem[0]) {
embd_inp_mem.erase(embd_inp_mem.begin());
}
}
//append llava dummy tokens
embd_inp_mem.insert(embd_inp_mem.begin(), llava_mem.begin(), llava_mem.end());
if (bos.size() > 0 && embd_inp_mem.size() > 0)
{
embd_inp_mem.insert(embd_inp_mem.begin(), bos[0]); //insert bos at front
}
//shorten memory if needed
if (embd_inp_mem.size() + kcpp_params->n_predict + 4 > nctx)
{
int limit = nctx - (kcpp_params->n_predict + 4);
if (embd_inp_mem.size() > limit) {
embd_inp_mem.resize(limit);
}
}
}
}
//added special memory, overwrite if needed //added special memory, overwrite if needed
if(addedmemory!="") if(embd_inp_mem.size()>0)
{ {
//remove bos token from prompt, it'll be taken from memory //remove bos token from prompt, it'll be taken from memory
std::vector<int> bos; std::vector<int> bos;
@ -1750,7 +1817,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
//stick memory to front of prompt //stick memory to front of prompt
embd_inp.insert(embd_inp.begin(), embd_inp_mem.begin(), embd_inp_mem.end()); embd_inp.insert(embd_inp.begin(), embd_inp_mem.begin(), embd_inp_mem.end());
} }
//determine how much npast we have to rewind from the current state //determine how much npast we have to rewind from the current state
@ -2148,16 +2214,70 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
// some user input remains from prompt or interaction, forward it to processing // some user input remains from prompt or interaction, forward it to processing
while ((int)embd_inp.size() > input_consumed) while ((int)embd_inp.size() > input_consumed)
{ {
embd.push_back(embd_inp[input_consumed]); int currtoken = embd_inp[input_consumed];
if(currtoken==LLAVA_TOKEN_IDENTIFIER_A || currtoken==LLAVA_TOKEN_IDENTIFIER_B) //special llava token hit
{
//if partial batch, dispatch existing first
if(embd.size()>0)
{
break;
}
else
{
//batch is empty, do image processing
int llavatokenscounted = 0;
int llavatokensevaled = 0;
while(input_consumed < embd_inp.size() && (embd_inp[input_consumed]==LLAVA_TOKEN_IDENTIFIER_A || embd_inp[input_consumed]==LLAVA_TOKEN_IDENTIFIER_B))
{
last_n_tokens.erase(last_n_tokens.begin()); last_n_tokens.erase(last_n_tokens.begin());
last_n_tokens.push_back(embd_inp[input_consumed]); last_n_tokens.push_back(currtoken);
current_context_tokens.push_back(embd_inp[input_consumed]); current_context_tokens.push_back(currtoken);
++input_consumed;
++llavatokenscounted;
}
for(int i=0;i<llava_images.size();++i)
{
if(allow_regular_prints)
{
printf("\rProcessing LLaVa Embedding %d (%d tokens)",(i+1), llava_images[i].clp_image_tokens);
}
bool err = kcpp_eval_image(llama_ctx_v4,llava_images[i].clp_img_embd,llava_images[i].clp_image_tokens,kcpp_params->n_batch,&n_past);
llavatokensevaled += llava_images[i].clp_image_tokens;
if(!err)
{
llava_composite_image_signature = ""; //force invalidate
fprintf(stderr, "\nFailed to eval llava image at %d!\n",n_past);
output.text = nullptr;
output.status = 0;
generation_finished = true;
return output;
}
}
if(llavatokenscounted!=llavatokensevaled)
{
llava_composite_image_signature = ""; //force invalidate
fprintf(stderr, "\nLLAVA image tokens mismatch at %d! (%d vs %d tokens)\n",n_past,llavatokenscounted,llavatokensevaled);
output.text = nullptr;
output.status = 0;
generation_finished = true;
return output;
}
}
}
else
{
embd.push_back(currtoken);
last_n_tokens.erase(last_n_tokens.begin());
last_n_tokens.push_back(currtoken);
current_context_tokens.push_back(currtoken);
++input_consumed; ++input_consumed;
if ((int)embd.size() >= kcpp_params->n_batch) if ((int)embd.size() >= kcpp_params->n_batch)
{ {
break; break;
} }
} }
}
} }
} }

View file

@ -3519,6 +3519,8 @@ Current version: 122
saved_kai_addr: "", //do not ever share this in save files! saved_kai_addr: "", //do not ever share this in save files!
saved_oai_jailbreak: "", //customized oai system prompt saved_oai_jailbreak: "", //customized oai system prompt
saved_oai_jailbreak2: "", //oai assistant postfix saved_oai_jailbreak2: "", //oai assistant postfix
saved_claude_jailbreak: "", //claude system prompt
saved_claude_jailbreak2: "", //claude assistant postfix
saved_oai_custommodel: "", //customized oai custom model saved_oai_custommodel: "", //customized oai custom model
saved_oai_role: 0, //0=user,1=assistant,2=system saved_oai_role: 0, //0=user,1=assistant,2=system
saved_a1111_url: default_a1111_base, saved_a1111_url: default_a1111_base,
@ -4464,6 +4466,10 @@ Current version: 122
{ {
return (custom_kobold_endpoint!="" && koboldcpp_version && koboldcpp_version!="" && compare_version_str(koboldcpp_version, "1.49") >= 0); return (custom_kobold_endpoint!="" && koboldcpp_version && koboldcpp_version!="" && compare_version_str(koboldcpp_version, "1.49") >= 0);
} }
function is_using_kcpp_with_llava()
{
return (custom_kobold_endpoint!="" && koboldcpp_version && koboldcpp_version!="" && compare_version_str(koboldcpp_version, "1.61") >= 0);
}
//0 is none, 1 is pseudostreaming, 2 is true poll-streaming, 3 is sse-streaming //0 is none, 1 is pseudostreaming, 2 is true poll-streaming, 3 is sse-streaming
function determine_streaming_type() function determine_streaming_type()
@ -6861,7 +6867,8 @@ Current version: 122
document.getElementById("claudecustom").classList.remove("hidden"); document.getElementById("claudecustom").classList.remove("hidden");
document.getElementById("custom_claude_key").value = localsettings.saved_claude_key; document.getElementById("custom_claude_key").value = localsettings.saved_claude_key;
document.getElementById("custom_claude_endpoint").value = (localsettings.saved_claude_addr?localsettings.saved_claude_addr:default_claude_base); document.getElementById("custom_claude_endpoint").value = (localsettings.saved_claude_addr?localsettings.saved_claude_addr:default_claude_base);
document.getElementById("claudesystemprompt").value = localsettings.saved_claude_jailbreak;
document.getElementById("claudejailbreakprompt").value = localsettings.saved_claude_jailbreak2;
} }
else if(epchoice==4) else if(epchoice==4)
{ {
@ -7272,6 +7279,8 @@ Current version: 122
custom_claude_key = desired_claude_key; custom_claude_key = desired_claude_key;
localsettings.saved_claude_key = custom_claude_key; localsettings.saved_claude_key = custom_claude_key;
localsettings.saved_claude_addr = custom_claude_endpoint; localsettings.saved_claude_addr = custom_claude_endpoint;
localsettings.saved_claude_jailbreak = document.getElementById("claudesystemprompt").value;
localsettings.saved_claude_jailbreak2 = document.getElementById("claudejailbreakprompt").value;
custom_claude_model = document.getElementById("custom_claude_model").value.trim(); custom_claude_model = document.getElementById("custom_claude_model").value.trim();
selected_models = [{ "performance": 100.0, "queued": 0.0, "eta": 0, "name": custom_claude_model, "count": 1 }]; selected_models = [{ "performance": 100.0, "queued": 0.0, "eta": 0, "name": custom_claude_model, "count": 1 }];
@ -9723,6 +9732,10 @@ Current version: 122
{ {
submit_payload.params.memory = truncated_memory; submit_payload.params.memory = truncated_memory;
} }
if(is_using_kcpp_with_llava() && insertAIVisionImages.length>0)
{
submit_payload.params.images = insertAIVisionImages;
}
if(localsettings.sampler_seed>=1) if(localsettings.sampler_seed>=1)
{ {
@ -10535,7 +10548,7 @@ Current version: 122
compressImage(origImg, (newDataUri) => { compressImage(origImg, (newDataUri) => {
image_db[imgid].done = true; image_db[imgid].done = true;
image_db[imgid].result = newDataUri; image_db[imgid].result = newDataUri;
}, true, true, imgres,0.35,false); }, true, false, imgres,0.35,false);
}else{ }else{
image_db[imgid].queue = "Failed"; image_db[imgid].queue = "Failed";
msgbox("Image Generation Failed!\n\nPlease make sure A1111 is running and properly configured!\nIn your local install of Automatic1111 WebUi, modify webui-user.bat and add these flags to enable API access:\n\nset COMMANDLINE_ARGS= --api --listen --cors-allow-origins=*\n"); msgbox("Image Generation Failed!\n\nPlease make sure A1111 is running and properly configured!\nIn your local install of Automatic1111 WebUi, modify webui-user.bat and add these flags to enable API access:\n\nset COMMANDLINE_ARGS= --api --listen --cors-allow-origins=*\n");
@ -10574,14 +10587,14 @@ Current version: 122
} }
} }
function interrogate_new_image(base64img, imghash) function interrogate_new_image(base64img, imghash, use_horde=true)
{ {
let parts = base64img.split(','); let parts = base64img.split(',');
if (parts.length === 2 && parts[0].startsWith('data:image')) { if (parts.length === 2 && parts[0].startsWith('data:image')) {
base64img = parts[1]; base64img = parts[1];
} }
if(localsettings.generate_images_mode==2) //a1111 if(!use_horde) //a1111
{ {
let payload = { let payload = {
"image": base64img, "image": base64img,
@ -10657,15 +10670,15 @@ Current version: 122
let savedmeta = completed_imgs_meta[imghash]; let savedmeta = completed_imgs_meta[imghash];
if(savedmeta) if(savedmeta)
{ {
savedmeta.enabled = !savedmeta.enabled; savedmeta.visionmode = document.getElementById("aivisionmode").value;
if(!savedmeta.desc && savedmeta.enabled) if(!savedmeta.desc && (savedmeta.visionmode==1 || savedmeta.visionmode==2))
{ {
//request a new interrogation //request a new interrogation
var alreadysent = Object.values(interrogation_db).some(item => item.imghash === imghash); var alreadysent = Object.values(interrogation_db).some(item => item.imghash === imghash);
if(!alreadysent) if(!alreadysent)
{ {
let b64 = document.getElementById("zoomedimg").src; let b64 = document.getElementById("zoomedimg").src;
interrogate_new_image(b64,imghash); interrogate_new_image(b64,imghash,(savedmeta.visionmode==1));
} }
} }
update_clicked_image(imghash); update_clicked_image(imghash);
@ -10681,7 +10694,7 @@ Current version: 122
let savedmeta = completed_imgs_meta[imghash]; let savedmeta = completed_imgs_meta[imghash];
if(!savedmeta && imghash!="") if(!savedmeta && imghash!="")
{ {
savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", enabled:false, aspect:0}; savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0};
} }
if(savedmeta) if(savedmeta)
@ -10697,15 +10710,26 @@ Current version: 122
document.getElementById("zoomedimg").classList.add("landscape"); document.getElementById("zoomedimg").classList.add("landscape");
} }
if(!savedmeta.visionmode)
{
savedmeta.visionmode = 0;
}
let origprompt = (savedmeta.prompt?replaceAll(savedmeta.prompt,"\n"," ") : "No Saved Description"); let origprompt = (savedmeta.prompt?replaceAll(savedmeta.prompt,"\n"," ") : "No Saved Description");
latest_orig_prompt = origprompt; latest_orig_prompt = origprompt;
let visionstatus = (savedmeta.enabled?(savedmeta.desc?`<span class="color_green">Active</span>`:`<span class="color_yellow">Analyzing...</span>`):`<span class="color_red">Inactive</span>`); let visionstatus = ((!savedmeta.visionmode || savedmeta.visionmode==0)?`<span class="color_red">Inactive</span>`:((savedmeta.desc||savedmeta.visionmode==3)?`<span class="color_green">Active</span>`:`<span class="color_yellow">Analyzing</span>`));
let togglebtn = (savedmeta.enabled?`<button type="button" class="bg_red btn btn-primary" style="width: 84px; padding: 2px; margin: 3px; font-size:12px;" onclick="toggle_ai_vision(\'`+imghash+`\')">Disable</button>`:`<button type="button" class="bg_green btn btn-primary" style="width: 84px; padding: 2px; margin: 3px; font-size:12px;" onclick="toggle_ai_vision(\'`+imghash+`\')">👁️ Enable 👁️</button>`); let togglebtn = `<select class="form-control" id="aivisionmode" style="display:inline;height:24px;width: 134px; padding: 2px; margin: 3px; font-size:12px;" onchange="toggle_ai_vision(\'`+imghash+`\')">
<option value="0">Disabled</option>
<option value="1">Interrogate (Horde)</option>
<option value="2">Interrogate (A1111)</option>
<option value="3">Multimodal (LLaVA)</option>
</select>`;
document.getElementById("zoomedimgdesc").innerHTML = ` document.getElementById("zoomedimgdesc").innerHTML = `
AI Vision: `+visionstatus+` <span class="helpicon">?<span class="helptext">This allows the AI to visually recognize this image, to see and react to this image. Uses Horde or Local A1111 for image interrogation if enabled.</span></span> AI Vision: `+visionstatus+` <span class="helpicon">?<span class="helptext">This allows the AI to visually recognize this image, to see and react to this image. Uses Horde or Local A1111 for image interrogation if enabled.</span></span>
`+togglebtn+` `+togglebtn+`
<br><button type="button" class="btn btn-primary" style="width: 140px; padding: 2px; margin: 3px; font-size:12px;" onclick="show_orig_prompt()">View Original Prompt</button> <br><button type="button" class="btn btn-primary" style="width: 140px; padding: 2px; margin: 3px; font-size:12px;" onclick="show_orig_prompt()">View Original Prompt</button>
`; `;
document.getElementById("aivisionmode").value = savedmeta.visionmode;
} }
else else
{ {
@ -11096,7 +11120,7 @@ Current version: 122
let imgres = localsettings.img_allowhd?HD_RES_PX:NO_HD_RES_PX; let imgres = localsettings.img_allowhd?HD_RES_PX:NO_HD_RES_PX;
compressImage(origImg, (newDataUri) => { compressImage(origImg, (newDataUri) => {
img.result = newDataUri; img.result = newDataUri;
}, true, true, imgres,0.35,false); }, true, false, imgres,0.35,false);
} }
}) })
.catch((error) => { .catch((error) => {
@ -11135,7 +11159,7 @@ Current version: 122
console.log("Replacing with Image: " + matchstr); console.log("Replacing with Image: " + matchstr);
gametext_arr[i] = gametext_arr[i].replace(matchstr, newstr); gametext_arr[i] = gametext_arr[i].replace(matchstr, newstr);
let metaid = cyrb_hash(img.result); let metaid = cyrb_hash(img.result);
completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", enabled:false, aspect:image_db[key].aspect}; completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", visionmode:0, aspect:image_db[key].aspect};
delete image_db[key]; delete image_db[key];
} }
} }
@ -11167,23 +11191,15 @@ Current version: 122
if(!fixedSize) if(!fixedSize)
{ {
//otherwise, we preserve the original ratio but scale them down to fit //otherwise, we preserve the original ratio but scale them down to fit
let newwidth = maxSize; let maxImgDim = Math.max(origW,origH);
let newheight = maxSize; wantedWidth = origW;
let scalef = 1; wantedHeight = origH;
if(origW>=origH) if(maxImgDim > maxSize)
{ {
newwidth = origW>maxSize?maxSize:origW; let scalef = maxImgDim/maxSize;
scalef = newwidth/origW; wantedWidth = origW/scalef;
newheight = origH*scalef; wantedHeight = origH/scalef;
} }
else
{
newheight = origH>maxSize?maxSize:origH;
scalef = newheight/origH;
newwidth = origW*scalef;
}
wantedWidth = newwidth;
wantedHeight = newheight;
} }
canvas.width = wantedWidth; canvas.width = wantedWidth;
@ -11612,6 +11628,7 @@ Current version: 122
} }
} }
var insertAIVisionImages = []; //concat gametext will populate this
function concat_gametext(stripimg = false, stripimg_replace_str = "", append_before_segment="",append_after_segment="",escapeTxt=false,insertAIVision=false) { function concat_gametext(stripimg = false, stripimg_replace_str = "", append_before_segment="",append_after_segment="",escapeTxt=false,insertAIVision=false) {
let fulltxt = ""; let fulltxt = "";
for (let i = 0; i < gametext_arr.length; ++i) { for (let i = 0; i < gametext_arr.length; ++i) {
@ -11646,9 +11663,6 @@ Current version: 122
let a = escapeHtml(localsettings.chatname); let a = escapeHtml(localsettings.chatname);
fulltxt = replaceAll(fulltxt,a,localsettings.chatname); fulltxt = replaceAll(fulltxt,a,localsettings.chatname);
// let b = escapeHtml(localsettings.chatopponent);
// fulltxt = replaceAll(fulltxt,b,localsettings.chatopponent);
//unescape other chat opponents too (match anything that is NOT us) //unescape other chat opponents too (match anything that is NOT us)
var regex = new RegExp("\n(?!" + localsettings.chatname + ").+?\: ", "gi"); var regex = new RegExp("\n(?!" + localsettings.chatname + ").+?\: ", "gi");
fulltxt = fulltxt.replace(regex, function (m) { fulltxt = fulltxt.replace(regex, function (m) {
@ -11667,14 +11681,26 @@ Current version: 122
{ {
if(insertAIVision) if(insertAIVision)
{ {
insertAIVisionImages = []; //a bit hacky
fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) { fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) {
// m here means the whole matched string // m here means the whole matched string
let inner = m.substring(5, m.length - 5); let inner = m.substring(5, m.length - 5);
let imghash = cyrb_hash(inner); let imghash = cyrb_hash(inner);
let foundmeta = completed_imgs_meta[imghash]; let foundmeta = completed_imgs_meta[imghash];
if (foundmeta != null && foundmeta.enabled && foundmeta.desc) { if (foundmeta != null) {
if(foundmeta.desc && (foundmeta.visionmode==1||foundmeta.visionmode==2))
{
return "\n(Attached Image: " + foundmeta.desc + ")\n"; return "\n(Attached Image: " + foundmeta.desc + ")\n";
} }
else if(foundmeta.visionmode==3)
{
let parts = inner.split(',');
if (parts.length === 2 && parts[0].startsWith('data:image')) {
insertAIVisionImages.push(parts[1]);
}
return "\n(Attached Image)\n";
}
}
return ""; return "";
}); });
} }