mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
llava support is now fully functioning
This commit is contained in:
parent
d943c739a8
commit
484d90c330
2 changed files with 193 additions and 47 deletions
|
@ -33,6 +33,11 @@
|
||||||
#include "examples/llava/clip.h"
|
#include "examples/llava/clip.h"
|
||||||
#include "examples/llava/llava.h"
|
#include "examples/llava/llava.h"
|
||||||
|
|
||||||
|
//const
|
||||||
|
const int extra_context_handle_fragmentation = 80;
|
||||||
|
const int LLAVA_TOKEN_IDENTIFIER_A = -998; //alternate between both, changing when image changes
|
||||||
|
const int LLAVA_TOKEN_IDENTIFIER_B = -999;
|
||||||
|
|
||||||
//shared
|
//shared
|
||||||
std::string executable_path = "";
|
std::string executable_path = "";
|
||||||
std::string lora_filename = "";
|
std::string lora_filename = "";
|
||||||
|
@ -80,6 +85,8 @@ static llama_context * llama_ctx_v4;
|
||||||
static clip_ctx * clp_ctx = nullptr; //for llava
|
static clip_ctx * clp_ctx = nullptr; //for llava
|
||||||
static clip_image_u8 * clp_img_data = nullptr; //most recent image
|
static clip_image_u8 * clp_img_data = nullptr; //most recent image
|
||||||
static std::vector<llava_image> llava_images;
|
static std::vector<llava_image> llava_images;
|
||||||
|
static std::string llava_composite_image_signature = ""; //for identifying when the llava images change, we need to invalidate the cache
|
||||||
|
static int current_llava_identifier = LLAVA_TOKEN_IDENTIFIER_A;
|
||||||
|
|
||||||
static gpt_params * kcpp_params = nullptr;
|
static gpt_params * kcpp_params = nullptr;
|
||||||
static int max_context_limit_at_load = 0;
|
static int max_context_limit_at_load = 0;
|
||||||
|
@ -105,8 +112,6 @@ static std::string concat_output_reader_copy_poll = ""; //for streaming
|
||||||
static std::string concat_output_reader_copy_res = ""; //for gen response
|
static std::string concat_output_reader_copy_res = ""; //for gen response
|
||||||
static std::vector<logit_bias> logit_biases;
|
static std::vector<logit_bias> logit_biases;
|
||||||
|
|
||||||
const int extra_context_handle_fragmentation = 80;
|
|
||||||
|
|
||||||
inline bool IsNanCheck(float f)
|
inline bool IsNanCheck(float f)
|
||||||
{
|
{
|
||||||
const unsigned int u = *(unsigned int*)&f;
|
const unsigned int u = *(unsigned int*)&f;
|
||||||
|
@ -1080,7 +1085,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(mmproj_filename != "")
|
if(mmproj_filename != "" && file_format==FileFormat::GGUF_GENERIC)
|
||||||
{
|
{
|
||||||
printf("\nAttempting to apply Multimodal Projector: %s\n", mmproj_filename.c_str());
|
printf("\nAttempting to apply Multimodal Projector: %s\n", mmproj_filename.c_str());
|
||||||
clp_ctx = clip_model_load(mmproj_filename.c_str(), /*verbosity=*/ 1);
|
clp_ctx = clip_model_load(mmproj_filename.c_str(), /*verbosity=*/ 1);
|
||||||
|
@ -1593,6 +1598,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
llava_images.clear();
|
llava_images.clear();
|
||||||
|
std::string new_llava_composite = "";
|
||||||
for(int x=0;x<images_max;++x)
|
for(int x=0;x<images_max;++x)
|
||||||
{
|
{
|
||||||
std::string item = inputs.images[x];
|
std::string item = inputs.images[x];
|
||||||
|
@ -1601,6 +1607,17 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
llava_image lv;
|
llava_image lv;
|
||||||
lv.b64data = item;
|
lv.b64data = item;
|
||||||
llava_images.push_back(lv);
|
llava_images.push_back(lv);
|
||||||
|
new_llava_composite += item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(llava_composite_image_signature!=new_llava_composite)
|
||||||
|
{
|
||||||
|
//images have changed. swap identifiers to force reprocessing
|
||||||
|
current_llava_identifier = (current_llava_identifier==LLAVA_TOKEN_IDENTIFIER_A?LLAVA_TOKEN_IDENTIFIER_B:LLAVA_TOKEN_IDENTIFIER_A);
|
||||||
|
llava_composite_image_signature = new_llava_composite;
|
||||||
|
if(debugmode==1)
|
||||||
|
{
|
||||||
|
printf("\nLLAVA images changed, existing cache invalidated");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1667,6 +1684,10 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
// tokenize the prompt
|
// tokenize the prompt
|
||||||
std::vector<int> embd_inp;
|
std::vector<int> embd_inp;
|
||||||
std::vector<int> embd_inp_mem; //for storing added memory
|
std::vector<int> embd_inp_mem; //for storing added memory
|
||||||
|
std::vector<int> llava_mem; //for storing dummy tokens that will be consumed by llava
|
||||||
|
|
||||||
|
int32_t nctx = kcpp_params->n_ctx;
|
||||||
|
|
||||||
TokenizeString(kcpp_params->prompt, embd_inp, file_format);
|
TokenizeString(kcpp_params->prompt, embd_inp, file_format);
|
||||||
|
|
||||||
if(clp_ctx!=nullptr && clp_img_data!=nullptr)
|
if(clp_ctx!=nullptr && clp_img_data!=nullptr)
|
||||||
|
@ -1686,7 +1707,20 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
if (!llava_image_embed_make_with_clip_img(clp_ctx, kcpp_params->n_threads, clp_img_data, &llava_images[i].clp_img_embd, &llava_images[i].clp_image_tokens)) {
|
if (!llava_image_embed_make_with_clip_img(clp_ctx, kcpp_params->n_threads, clp_img_data, &llava_images[i].clp_img_embd, &llava_images[i].clp_image_tokens)) {
|
||||||
printf("\nError: Clip image %d failed to create embd!",i);
|
printf("\nError: Clip image %d failed to create embd!",i);
|
||||||
}
|
}
|
||||||
printf("\nLLAVA Clip Embed %i used Tokens: %d",i,llava_images[i].clp_image_tokens);
|
if(debugmode==1)
|
||||||
|
{
|
||||||
|
printf("\nLLAVA Clip Embed %i used Tokens: %d",i,llava_images[i].clp_image_tokens);
|
||||||
|
}
|
||||||
|
if(llava_images[i].clp_image_tokens>0 && llava_images[i].clp_image_tokens < nctx)
|
||||||
|
{
|
||||||
|
for(int n=0;n<llava_images[i].clp_image_tokens;++n)
|
||||||
|
{
|
||||||
|
llava_mem.push_back(current_llava_identifier);
|
||||||
|
}
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
printf("\nWarning: LLAVA Image excluded - Context size too low or not enough clip tokens!\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1697,8 +1731,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
}
|
}
|
||||||
|
|
||||||
//truncate to front of the prompt if its too long
|
//truncate to front of the prompt if its too long
|
||||||
int32_t nctx = kcpp_params->n_ctx;
|
|
||||||
|
|
||||||
if (embd_inp.size() + kcpp_params->n_predict > nctx)
|
if (embd_inp.size() + kcpp_params->n_predict > nctx)
|
||||||
{
|
{
|
||||||
//get bos token
|
//get bos token
|
||||||
|
@ -1713,8 +1745,43 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(llava_mem.size()>0) //stick the llava mem before the added mem
|
||||||
|
{
|
||||||
|
if(llava_mem.size() + kcpp_params->n_predict + 4 > nctx)
|
||||||
|
{
|
||||||
|
printf("\nWarning: Too many LLaVA tokens, max context exceeded! They will be ignored!\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::vector<int> bos;
|
||||||
|
TokenizeString("", bos, file_format);
|
||||||
|
if(embd_inp_mem.size()>0) //remove existing bos if exists
|
||||||
|
{
|
||||||
|
if (bos.size()>0 && !embd_inp_mem.empty() && bos[0]==embd_inp_mem[0]) {
|
||||||
|
embd_inp_mem.erase(embd_inp_mem.begin());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//append llava dummy tokens
|
||||||
|
embd_inp_mem.insert(embd_inp_mem.begin(), llava_mem.begin(), llava_mem.end());
|
||||||
|
if (bos.size() > 0 && embd_inp_mem.size() > 0)
|
||||||
|
{
|
||||||
|
embd_inp_mem.insert(embd_inp_mem.begin(), bos[0]); //insert bos at front
|
||||||
|
}
|
||||||
|
|
||||||
|
//shorten memory if needed
|
||||||
|
if (embd_inp_mem.size() + kcpp_params->n_predict + 4 > nctx)
|
||||||
|
{
|
||||||
|
int limit = nctx - (kcpp_params->n_predict + 4);
|
||||||
|
if (embd_inp_mem.size() > limit) {
|
||||||
|
embd_inp_mem.resize(limit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//added special memory, overwrite if needed
|
//added special memory, overwrite if needed
|
||||||
if(addedmemory!="")
|
if(embd_inp_mem.size()>0)
|
||||||
{
|
{
|
||||||
//remove bos token from prompt, it'll be taken from memory
|
//remove bos token from prompt, it'll be taken from memory
|
||||||
std::vector<int> bos;
|
std::vector<int> bos;
|
||||||
|
@ -1750,7 +1817,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
|
|
||||||
//stick memory to front of prompt
|
//stick memory to front of prompt
|
||||||
embd_inp.insert(embd_inp.begin(), embd_inp_mem.begin(), embd_inp_mem.end());
|
embd_inp.insert(embd_inp.begin(), embd_inp_mem.begin(), embd_inp_mem.end());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//determine how much npast we have to rewind from the current state
|
//determine how much npast we have to rewind from the current state
|
||||||
|
@ -2148,15 +2214,69 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
// some user input remains from prompt or interaction, forward it to processing
|
// some user input remains from prompt or interaction, forward it to processing
|
||||||
while ((int)embd_inp.size() > input_consumed)
|
while ((int)embd_inp.size() > input_consumed)
|
||||||
{
|
{
|
||||||
embd.push_back(embd_inp[input_consumed]);
|
int currtoken = embd_inp[input_consumed];
|
||||||
last_n_tokens.erase(last_n_tokens.begin());
|
if(currtoken==LLAVA_TOKEN_IDENTIFIER_A || currtoken==LLAVA_TOKEN_IDENTIFIER_B) //special llava token hit
|
||||||
last_n_tokens.push_back(embd_inp[input_consumed]);
|
|
||||||
current_context_tokens.push_back(embd_inp[input_consumed]);
|
|
||||||
++input_consumed;
|
|
||||||
if ((int)embd.size() >= kcpp_params->n_batch)
|
|
||||||
{
|
{
|
||||||
break;
|
//if partial batch, dispatch existing first
|
||||||
|
if(embd.size()>0)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//batch is empty, do image processing
|
||||||
|
int llavatokenscounted = 0;
|
||||||
|
int llavatokensevaled = 0;
|
||||||
|
while(input_consumed < embd_inp.size() && (embd_inp[input_consumed]==LLAVA_TOKEN_IDENTIFIER_A || embd_inp[input_consumed]==LLAVA_TOKEN_IDENTIFIER_B))
|
||||||
|
{
|
||||||
|
last_n_tokens.erase(last_n_tokens.begin());
|
||||||
|
last_n_tokens.push_back(currtoken);
|
||||||
|
current_context_tokens.push_back(currtoken);
|
||||||
|
++input_consumed;
|
||||||
|
++llavatokenscounted;
|
||||||
|
}
|
||||||
|
for(int i=0;i<llava_images.size();++i)
|
||||||
|
{
|
||||||
|
if(allow_regular_prints)
|
||||||
|
{
|
||||||
|
printf("\rProcessing LLaVa Embedding %d (%d tokens)",(i+1), llava_images[i].clp_image_tokens);
|
||||||
|
}
|
||||||
|
bool err = kcpp_eval_image(llama_ctx_v4,llava_images[i].clp_img_embd,llava_images[i].clp_image_tokens,kcpp_params->n_batch,&n_past);
|
||||||
|
llavatokensevaled += llava_images[i].clp_image_tokens;
|
||||||
|
if(!err)
|
||||||
|
{
|
||||||
|
llava_composite_image_signature = ""; //force invalidate
|
||||||
|
fprintf(stderr, "\nFailed to eval llava image at %d!\n",n_past);
|
||||||
|
output.text = nullptr;
|
||||||
|
output.status = 0;
|
||||||
|
generation_finished = true;
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(llavatokenscounted!=llavatokensevaled)
|
||||||
|
{
|
||||||
|
llava_composite_image_signature = ""; //force invalidate
|
||||||
|
fprintf(stderr, "\nLLAVA image tokens mismatch at %d! (%d vs %d tokens)\n",n_past,llavatokenscounted,llavatokensevaled);
|
||||||
|
output.text = nullptr;
|
||||||
|
output.status = 0;
|
||||||
|
generation_finished = true;
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
embd.push_back(currtoken);
|
||||||
|
last_n_tokens.erase(last_n_tokens.begin());
|
||||||
|
last_n_tokens.push_back(currtoken);
|
||||||
|
current_context_tokens.push_back(currtoken);
|
||||||
|
++input_consumed;
|
||||||
|
if ((int)embd.size() >= kcpp_params->n_batch)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
90
klite.embd
90
klite.embd
|
@ -3519,6 +3519,8 @@ Current version: 122
|
||||||
saved_kai_addr: "", //do not ever share this in save files!
|
saved_kai_addr: "", //do not ever share this in save files!
|
||||||
saved_oai_jailbreak: "", //customized oai system prompt
|
saved_oai_jailbreak: "", //customized oai system prompt
|
||||||
saved_oai_jailbreak2: "", //oai assistant postfix
|
saved_oai_jailbreak2: "", //oai assistant postfix
|
||||||
|
saved_claude_jailbreak: "", //claude system prompt
|
||||||
|
saved_claude_jailbreak2: "", //claude assistant postfix
|
||||||
saved_oai_custommodel: "", //customized oai custom model
|
saved_oai_custommodel: "", //customized oai custom model
|
||||||
saved_oai_role: 0, //0=user,1=assistant,2=system
|
saved_oai_role: 0, //0=user,1=assistant,2=system
|
||||||
saved_a1111_url: default_a1111_base,
|
saved_a1111_url: default_a1111_base,
|
||||||
|
@ -4464,6 +4466,10 @@ Current version: 122
|
||||||
{
|
{
|
||||||
return (custom_kobold_endpoint!="" && koboldcpp_version && koboldcpp_version!="" && compare_version_str(koboldcpp_version, "1.49") >= 0);
|
return (custom_kobold_endpoint!="" && koboldcpp_version && koboldcpp_version!="" && compare_version_str(koboldcpp_version, "1.49") >= 0);
|
||||||
}
|
}
|
||||||
|
function is_using_kcpp_with_llava()
|
||||||
|
{
|
||||||
|
return (custom_kobold_endpoint!="" && koboldcpp_version && koboldcpp_version!="" && compare_version_str(koboldcpp_version, "1.61") >= 0);
|
||||||
|
}
|
||||||
|
|
||||||
//0 is none, 1 is pseudostreaming, 2 is true poll-streaming, 3 is sse-streaming
|
//0 is none, 1 is pseudostreaming, 2 is true poll-streaming, 3 is sse-streaming
|
||||||
function determine_streaming_type()
|
function determine_streaming_type()
|
||||||
|
@ -6861,7 +6867,8 @@ Current version: 122
|
||||||
document.getElementById("claudecustom").classList.remove("hidden");
|
document.getElementById("claudecustom").classList.remove("hidden");
|
||||||
document.getElementById("custom_claude_key").value = localsettings.saved_claude_key;
|
document.getElementById("custom_claude_key").value = localsettings.saved_claude_key;
|
||||||
document.getElementById("custom_claude_endpoint").value = (localsettings.saved_claude_addr?localsettings.saved_claude_addr:default_claude_base);
|
document.getElementById("custom_claude_endpoint").value = (localsettings.saved_claude_addr?localsettings.saved_claude_addr:default_claude_base);
|
||||||
|
document.getElementById("claudesystemprompt").value = localsettings.saved_claude_jailbreak;
|
||||||
|
document.getElementById("claudejailbreakprompt").value = localsettings.saved_claude_jailbreak2;
|
||||||
}
|
}
|
||||||
else if(epchoice==4)
|
else if(epchoice==4)
|
||||||
{
|
{
|
||||||
|
@ -7272,6 +7279,8 @@ Current version: 122
|
||||||
custom_claude_key = desired_claude_key;
|
custom_claude_key = desired_claude_key;
|
||||||
localsettings.saved_claude_key = custom_claude_key;
|
localsettings.saved_claude_key = custom_claude_key;
|
||||||
localsettings.saved_claude_addr = custom_claude_endpoint;
|
localsettings.saved_claude_addr = custom_claude_endpoint;
|
||||||
|
localsettings.saved_claude_jailbreak = document.getElementById("claudesystemprompt").value;
|
||||||
|
localsettings.saved_claude_jailbreak2 = document.getElementById("claudejailbreakprompt").value;
|
||||||
custom_claude_model = document.getElementById("custom_claude_model").value.trim();
|
custom_claude_model = document.getElementById("custom_claude_model").value.trim();
|
||||||
|
|
||||||
selected_models = [{ "performance": 100.0, "queued": 0.0, "eta": 0, "name": custom_claude_model, "count": 1 }];
|
selected_models = [{ "performance": 100.0, "queued": 0.0, "eta": 0, "name": custom_claude_model, "count": 1 }];
|
||||||
|
@ -9723,6 +9732,10 @@ Current version: 122
|
||||||
{
|
{
|
||||||
submit_payload.params.memory = truncated_memory;
|
submit_payload.params.memory = truncated_memory;
|
||||||
}
|
}
|
||||||
|
if(is_using_kcpp_with_llava() && insertAIVisionImages.length>0)
|
||||||
|
{
|
||||||
|
submit_payload.params.images = insertAIVisionImages;
|
||||||
|
}
|
||||||
|
|
||||||
if(localsettings.sampler_seed>=1)
|
if(localsettings.sampler_seed>=1)
|
||||||
{
|
{
|
||||||
|
@ -10535,7 +10548,7 @@ Current version: 122
|
||||||
compressImage(origImg, (newDataUri) => {
|
compressImage(origImg, (newDataUri) => {
|
||||||
image_db[imgid].done = true;
|
image_db[imgid].done = true;
|
||||||
image_db[imgid].result = newDataUri;
|
image_db[imgid].result = newDataUri;
|
||||||
}, true, true, imgres,0.35,false);
|
}, true, false, imgres,0.35,false);
|
||||||
}else{
|
}else{
|
||||||
image_db[imgid].queue = "Failed";
|
image_db[imgid].queue = "Failed";
|
||||||
msgbox("Image Generation Failed!\n\nPlease make sure A1111 is running and properly configured!\nIn your local install of Automatic1111 WebUi, modify webui-user.bat and add these flags to enable API access:\n\nset COMMANDLINE_ARGS= --api --listen --cors-allow-origins=*\n");
|
msgbox("Image Generation Failed!\n\nPlease make sure A1111 is running and properly configured!\nIn your local install of Automatic1111 WebUi, modify webui-user.bat and add these flags to enable API access:\n\nset COMMANDLINE_ARGS= --api --listen --cors-allow-origins=*\n");
|
||||||
|
@ -10574,14 +10587,14 @@ Current version: 122
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function interrogate_new_image(base64img, imghash)
|
function interrogate_new_image(base64img, imghash, use_horde=true)
|
||||||
{
|
{
|
||||||
let parts = base64img.split(',');
|
let parts = base64img.split(',');
|
||||||
if (parts.length === 2 && parts[0].startsWith('data:image')) {
|
if (parts.length === 2 && parts[0].startsWith('data:image')) {
|
||||||
base64img = parts[1];
|
base64img = parts[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
if(localsettings.generate_images_mode==2) //a1111
|
if(!use_horde) //a1111
|
||||||
{
|
{
|
||||||
let payload = {
|
let payload = {
|
||||||
"image": base64img,
|
"image": base64img,
|
||||||
|
@ -10657,15 +10670,15 @@ Current version: 122
|
||||||
let savedmeta = completed_imgs_meta[imghash];
|
let savedmeta = completed_imgs_meta[imghash];
|
||||||
if(savedmeta)
|
if(savedmeta)
|
||||||
{
|
{
|
||||||
savedmeta.enabled = !savedmeta.enabled;
|
savedmeta.visionmode = document.getElementById("aivisionmode").value;
|
||||||
if(!savedmeta.desc && savedmeta.enabled)
|
if(!savedmeta.desc && (savedmeta.visionmode==1 || savedmeta.visionmode==2))
|
||||||
{
|
{
|
||||||
//request a new interrogation
|
//request a new interrogation
|
||||||
var alreadysent = Object.values(interrogation_db).some(item => item.imghash === imghash);
|
var alreadysent = Object.values(interrogation_db).some(item => item.imghash === imghash);
|
||||||
if(!alreadysent)
|
if(!alreadysent)
|
||||||
{
|
{
|
||||||
let b64 = document.getElementById("zoomedimg").src;
|
let b64 = document.getElementById("zoomedimg").src;
|
||||||
interrogate_new_image(b64,imghash);
|
interrogate_new_image(b64,imghash,(savedmeta.visionmode==1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
update_clicked_image(imghash);
|
update_clicked_image(imghash);
|
||||||
|
@ -10681,7 +10694,7 @@ Current version: 122
|
||||||
let savedmeta = completed_imgs_meta[imghash];
|
let savedmeta = completed_imgs_meta[imghash];
|
||||||
if(!savedmeta && imghash!="")
|
if(!savedmeta && imghash!="")
|
||||||
{
|
{
|
||||||
savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", enabled:false, aspect:0};
|
savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0};
|
||||||
}
|
}
|
||||||
|
|
||||||
if(savedmeta)
|
if(savedmeta)
|
||||||
|
@ -10697,15 +10710,26 @@ Current version: 122
|
||||||
document.getElementById("zoomedimg").classList.add("landscape");
|
document.getElementById("zoomedimg").classList.add("landscape");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!savedmeta.visionmode)
|
||||||
|
{
|
||||||
|
savedmeta.visionmode = 0;
|
||||||
|
}
|
||||||
|
|
||||||
let origprompt = (savedmeta.prompt?replaceAll(savedmeta.prompt,"\n"," ") : "No Saved Description");
|
let origprompt = (savedmeta.prompt?replaceAll(savedmeta.prompt,"\n"," ") : "No Saved Description");
|
||||||
latest_orig_prompt = origprompt;
|
latest_orig_prompt = origprompt;
|
||||||
let visionstatus = (savedmeta.enabled?(savedmeta.desc?`<span class="color_green">Active</span>`:`<span class="color_yellow">Analyzing...</span>`):`<span class="color_red">Inactive</span>`);
|
let visionstatus = ((!savedmeta.visionmode || savedmeta.visionmode==0)?`<span class="color_red">Inactive</span>`:((savedmeta.desc||savedmeta.visionmode==3)?`<span class="color_green">Active</span>`:`<span class="color_yellow">Analyzing</span>`));
|
||||||
let togglebtn = (savedmeta.enabled?`<button type="button" class="bg_red btn btn-primary" style="width: 84px; padding: 2px; margin: 3px; font-size:12px;" onclick="toggle_ai_vision(\'`+imghash+`\')">Disable</button>`:`<button type="button" class="bg_green btn btn-primary" style="width: 84px; padding: 2px; margin: 3px; font-size:12px;" onclick="toggle_ai_vision(\'`+imghash+`\')">👁️ Enable 👁️</button>`);
|
let togglebtn = `<select class="form-control" id="aivisionmode" style="display:inline;height:24px;width: 134px; padding: 2px; margin: 3px; font-size:12px;" onchange="toggle_ai_vision(\'`+imghash+`\')">
|
||||||
|
<option value="0">Disabled</option>
|
||||||
|
<option value="1">Interrogate (Horde)</option>
|
||||||
|
<option value="2">Interrogate (A1111)</option>
|
||||||
|
<option value="3">Multimodal (LLaVA)</option>
|
||||||
|
</select>`;
|
||||||
document.getElementById("zoomedimgdesc").innerHTML = `
|
document.getElementById("zoomedimgdesc").innerHTML = `
|
||||||
AI Vision: `+visionstatus+` <span class="helpicon">?<span class="helptext">This allows the AI to visually recognize this image, to see and react to this image. Uses Horde or Local A1111 for image interrogation if enabled.</span></span>
|
AI Vision: `+visionstatus+` <span class="helpicon">?<span class="helptext">This allows the AI to visually recognize this image, to see and react to this image. Uses Horde or Local A1111 for image interrogation if enabled.</span></span>
|
||||||
`+togglebtn+`
|
`+togglebtn+`
|
||||||
<br><button type="button" class="btn btn-primary" style="width: 140px; padding: 2px; margin: 3px; font-size:12px;" onclick="show_orig_prompt()">View Original Prompt</button>
|
<br><button type="button" class="btn btn-primary" style="width: 140px; padding: 2px; margin: 3px; font-size:12px;" onclick="show_orig_prompt()">View Original Prompt</button>
|
||||||
`;
|
`;
|
||||||
|
document.getElementById("aivisionmode").value = savedmeta.visionmode;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -11096,7 +11120,7 @@ Current version: 122
|
||||||
let imgres = localsettings.img_allowhd?HD_RES_PX:NO_HD_RES_PX;
|
let imgres = localsettings.img_allowhd?HD_RES_PX:NO_HD_RES_PX;
|
||||||
compressImage(origImg, (newDataUri) => {
|
compressImage(origImg, (newDataUri) => {
|
||||||
img.result = newDataUri;
|
img.result = newDataUri;
|
||||||
}, true, true, imgres,0.35,false);
|
}, true, false, imgres,0.35,false);
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
|
@ -11135,7 +11159,7 @@ Current version: 122
|
||||||
console.log("Replacing with Image: " + matchstr);
|
console.log("Replacing with Image: " + matchstr);
|
||||||
gametext_arr[i] = gametext_arr[i].replace(matchstr, newstr);
|
gametext_arr[i] = gametext_arr[i].replace(matchstr, newstr);
|
||||||
let metaid = cyrb_hash(img.result);
|
let metaid = cyrb_hash(img.result);
|
||||||
completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", enabled:false, aspect:image_db[key].aspect};
|
completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", visionmode:0, aspect:image_db[key].aspect};
|
||||||
delete image_db[key];
|
delete image_db[key];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -11167,23 +11191,15 @@ Current version: 122
|
||||||
if(!fixedSize)
|
if(!fixedSize)
|
||||||
{
|
{
|
||||||
//otherwise, we preserve the original ratio but scale them down to fit
|
//otherwise, we preserve the original ratio but scale them down to fit
|
||||||
let newwidth = maxSize;
|
let maxImgDim = Math.max(origW,origH);
|
||||||
let newheight = maxSize;
|
wantedWidth = origW;
|
||||||
let scalef = 1;
|
wantedHeight = origH;
|
||||||
if(origW>=origH)
|
if(maxImgDim > maxSize)
|
||||||
{
|
{
|
||||||
newwidth = origW>maxSize?maxSize:origW;
|
let scalef = maxImgDim/maxSize;
|
||||||
scalef = newwidth/origW;
|
wantedWidth = origW/scalef;
|
||||||
newheight = origH*scalef;
|
wantedHeight = origH/scalef;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
newheight = origH>maxSize?maxSize:origH;
|
|
||||||
scalef = newheight/origH;
|
|
||||||
newwidth = origW*scalef;
|
|
||||||
}
|
|
||||||
wantedWidth = newwidth;
|
|
||||||
wantedHeight = newheight;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
canvas.width = wantedWidth;
|
canvas.width = wantedWidth;
|
||||||
|
@ -11612,6 +11628,7 @@ Current version: 122
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var insertAIVisionImages = []; //concat gametext will populate this
|
||||||
function concat_gametext(stripimg = false, stripimg_replace_str = "", append_before_segment="",append_after_segment="",escapeTxt=false,insertAIVision=false) {
|
function concat_gametext(stripimg = false, stripimg_replace_str = "", append_before_segment="",append_after_segment="",escapeTxt=false,insertAIVision=false) {
|
||||||
let fulltxt = "";
|
let fulltxt = "";
|
||||||
for (let i = 0; i < gametext_arr.length; ++i) {
|
for (let i = 0; i < gametext_arr.length; ++i) {
|
||||||
|
@ -11646,9 +11663,6 @@ Current version: 122
|
||||||
let a = escapeHtml(localsettings.chatname);
|
let a = escapeHtml(localsettings.chatname);
|
||||||
fulltxt = replaceAll(fulltxt,a,localsettings.chatname);
|
fulltxt = replaceAll(fulltxt,a,localsettings.chatname);
|
||||||
|
|
||||||
// let b = escapeHtml(localsettings.chatopponent);
|
|
||||||
// fulltxt = replaceAll(fulltxt,b,localsettings.chatopponent);
|
|
||||||
|
|
||||||
//unescape other chat opponents too (match anything that is NOT us)
|
//unescape other chat opponents too (match anything that is NOT us)
|
||||||
var regex = new RegExp("\n(?!" + localsettings.chatname + ").+?\: ", "gi");
|
var regex = new RegExp("\n(?!" + localsettings.chatname + ").+?\: ", "gi");
|
||||||
fulltxt = fulltxt.replace(regex, function (m) {
|
fulltxt = fulltxt.replace(regex, function (m) {
|
||||||
|
@ -11667,13 +11681,25 @@ Current version: 122
|
||||||
{
|
{
|
||||||
if(insertAIVision)
|
if(insertAIVision)
|
||||||
{
|
{
|
||||||
|
insertAIVisionImages = []; //a bit hacky
|
||||||
fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) {
|
fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) {
|
||||||
// m here means the whole matched string
|
// m here means the whole matched string
|
||||||
let inner = m.substring(5, m.length - 5);
|
let inner = m.substring(5, m.length - 5);
|
||||||
let imghash = cyrb_hash(inner);
|
let imghash = cyrb_hash(inner);
|
||||||
let foundmeta = completed_imgs_meta[imghash];
|
let foundmeta = completed_imgs_meta[imghash];
|
||||||
if (foundmeta != null && foundmeta.enabled && foundmeta.desc) {
|
if (foundmeta != null) {
|
||||||
return "\n(Attached Image: " + foundmeta.desc + ")\n";
|
if(foundmeta.desc && (foundmeta.visionmode==1||foundmeta.visionmode==2))
|
||||||
|
{
|
||||||
|
return "\n(Attached Image: " + foundmeta.desc + ")\n";
|
||||||
|
}
|
||||||
|
else if(foundmeta.visionmode==3)
|
||||||
|
{
|
||||||
|
let parts = inner.split(',');
|
||||||
|
if (parts.length === 2 && parts[0].startsWith('data:image')) {
|
||||||
|
insertAIVisionImages.push(parts[1]);
|
||||||
|
}
|
||||||
|
return "\n(Attached Image)\n";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
});
|
});
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue