From 1ae0a524ad024ac9060f2445054cfe15bf0482aa Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 15 Feb 2024 10:51:41 +0800 Subject: [PATCH] added ai vision --- klite.embd | 392 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 334 insertions(+), 58 deletions(-) diff --git a/klite.embd b/klite.embd index a0199ad3a..4bf6f7f47 100644 --- a/klite.embd +++ b/klite.embd @@ -6,7 +6,7 @@ It requires no dependencies, installation or setup. Just copy this single static HTML file anywhere and open it in a browser, or from a webserver. Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line. -Current version: 111 +Current version: 112 -Concedo --> @@ -1509,6 +1509,7 @@ Current version: 111 max-height: 120px; overflow-y: auto; overflow-x: hidden; + font-size: 12px; } .mdlpicker::-webkit-calendar-picker-indicator { opacity: 100; @@ -3294,6 +3295,8 @@ Current version: 111 const stablehorde_poll_endpoint = stablehorde_url + "/api/v2/generate/check"; const stablehorde_output_endpoint = stablehorde_url + "/api/v2/generate/status"; const stablehorde_model_endpoint = stablehorde_url + "/api/v2/status/models"; + const stablehorde_submit_interrogate_endpoint = stablehorde_url + "/api/v2/interrogate/async"; + const stablehorde_output_interrogate_endpoint = stablehorde_url + "/api/v2/interrogate/status"; const kobold_custom_gen_endpoint = "/api/v1/generate"; const kobold_custom_gen_stream_endpoint = "/api/extra/generate/stream"; @@ -3327,6 +3330,7 @@ Current version: 111 const a1111_models_endpoint = "/sdapi/v1/sd-models"; const a1111_options_endpoint = "/sdapi/v1/options"; const a1111_txt2img_endpoint = "/sdapi/v1/txt2img"; + const a1111_interrogate_endpoint = "/sdapi/v1/interrogate"; const xtts_gen_endpoint = "/tts_to_audio/"; const xtts_voices_endpoint = "/speakers_list"; @@ -3345,6 +3349,8 @@ Current version: 111 const default_xtts_base = " http://localhost:8020"; const XTTS_ID = 1000; + const HD_RES_PX = 400; + const NO_HD_RES_PX = 256; //all configurable globals var perfdata = null; //if it's null, we are not connected @@ -3379,6 +3385,7 @@ Current version: 111 var generateimagesinterval = 650; //if generated images is enabled, it will trigger after every 600 new characters in context. var nextgeneratedimagemilestone = generateimagesinterval; //used to keep track of when to generate the next image var image_db = {}; //stores a dictionary of pending images + var interrogation_db = {}; var completed_imgs_meta = {}; //stores temp info on completed images like alt text //key is ID, body is {done:false,queue:10,result:""} var stablemodels = [{"name": "stable_diffusion","count": 1}]; //stored as {name,count} @@ -4703,6 +4710,7 @@ Current version: 111 let new_save_storyobj = generate_base_storyobj(); let export_arr = gametext_arr; + let export_hashes = {}; if(!save_images) { export_arr = []; @@ -4710,6 +4718,22 @@ Current version: 111 export_arr.push(gametext_arr[i].replace(/\[<\|p\|.+?\|p\|>\]/g, "").replace(/\[<\|d\|.+?\|d\|>\]/g, "")); } } + else + { + //bake used image metas into savefile + for (let i = 0; i < gametext_arr.length; ++i) { + let matches = gametext_arr[i].match(/\[<\|d\|.+?\|d\|>\]/g); + for(let m in matches) + { + let inner = matches[m].substring(5, matches[m].length - 5); + let imghash = cyrb_hash(inner); + if (completed_imgs_meta[imghash] != null) { + export_hashes[imghash] = completed_imgs_meta[imghash]; + } + } + } + new_save_storyobj.completed_imgs_meta = export_hashes; + } if (export_arr.length > 0) { new_save_storyobj.prompt = export_arr[0]; @@ -5073,6 +5097,14 @@ Current version: 111 } } } + + if(storyobj.completed_imgs_meta) + { + for (var key in storyobj.completed_imgs_meta) + { + completed_imgs_meta[key] = storyobj.completed_imgs_meta[key]; + } + } } //port over old images to the new format @@ -6076,6 +6108,7 @@ Current version: 111 document.getElementById("zoomedimgcontainer").classList.contains("hidden") && document.getElementById("groupselectcontainer").classList.contains("hidden") && document.getElementById("imagestylecontainer").classList.contains("hidden") && + document.getElementById("addimgcontainer").classList.contains("hidden") && document.getElementById("advancedloadfile").classList.contains("hidden") ); } @@ -6096,6 +6129,7 @@ Current version: 111 document.getElementById("zoomedimgcontainer").classList.add("hidden"); document.getElementById("groupselectcontainer").classList.add("hidden"); document.getElementById("imagestylecontainer").classList.add("hidden"); + document.getElementById("addimgcontainer").classList.add("hidden"); document.getElementById("advancedloadfile").classList.add("hidden"); } @@ -8187,6 +8221,7 @@ Current version: 111 document.getElementById("cht_inp").value = ""; chat_resize_input(); image_db = {}; + interrogation_db = {}; completed_imgs_meta = {}; localsettings.adventure_is_action = false; prev_hl_chunk = null; @@ -8456,29 +8491,6 @@ Current version: 111 show_abort_button(false); } - var addimgLongPressTimer = null; - function btn_addimg_longpress_start() - { - addimgLongPressTimer = setTimeout(()=>{ - popup_manual_image(); - }, 2000); - } - function btn_addimg_longpress_end() - { - clearTimeout(addimgLongPressTimer); - } - function popup_manual_image() - { - inputBox("Tip: You can generate images manually by long-pressing the 'Add Img' button.\n\nEnter a prompt to generate an image with.","Generate Image Manually","","Enter a Prompt",()=>{ - let userinput = getInputBoxValue(); - if(userinput.trim()!="") - { - var sentence = userinput.trim().substring(0, 300); - do_manual_gen_image(sentence); - } - },false); - } - function do_manual_gen_image(sentence) { generate_new_image(sentence); @@ -8491,7 +8503,7 @@ Current version: 111 }, 10000); } - function manual_gen_image() { + function add_img_btn_auto() { let truncated_context = concat_gametext(true, ""); truncated_context = replace_placeholders(truncated_context); var tclen = truncated_context.length; @@ -8504,8 +8516,56 @@ Current version: 111 do_manual_gen_image(sentence); } }else{ - popup_manual_image(); + msgbox("Error: Your current story is blank.\nAdd some text, or try generating from custom prompt instead.","Story is Blank") } + document.getElementById("addimgcontainer").classList.add("hidden"); + } + + function add_img_btn_custom() + { + inputBox("Enter a custom prompt to generate an image with.","Generate Image Manually","","Enter a Prompt",()=>{ + let userinput = getInputBoxValue(); + if(userinput.trim()!="") + { + var sentence = userinput.trim().substring(0, 300); + do_manual_gen_image(sentence); + } + },false); + document.getElementById("addimgcontainer").classList.add("hidden"); + } + + function add_img_btn_upload() + { + let finput = document.getElementById('addimgfileinput'); + finput.click(); + finput.onchange = (event) => { + if (event.target.files.length > 0 && event.target.files[0]) { + const file = event.target.files[0]; + const reader = new FileReader(); + reader.onload = function(img) { + + let imgid = "selfuploadimg"+(Math.floor(10000 + Math.random() * 90000)).toString(); + let nimgtag = "[<|p|" + imgid + "|p|>]"; + gametext_arr.push(nimgtag); + image_db[imgid] = { done: false, queue: "Generating", result: "", prompt:"", local:true }; + + let imgres = localsettings.img_allowhd?HD_RES_PX:NO_HD_RES_PX; + compressImage(img.target.result, (newDataUri) => { + image_db[imgid].done = true; + image_db[imgid].result = newDataUri; + }, true, true, imgres,0.33,true); + + } + reader.readAsDataURL(file); + } + finput.value = ""; + }; + document.getElementById("addimgcontainer").classList.add("hidden"); + } + + function add_img_btn_menu() + { + document.getElementById("addimgcontainer").classList.remove("hidden"); } var xtts_is_connected = false; @@ -8788,7 +8848,7 @@ Current version: 111 } } - let truncated_context = concat_gametext(true, ""); //no need to truncate if memory is empty + let truncated_context = concat_gametext(true, "","","",false,true); //no need to truncate if memory is empty truncated_context = truncated_context.replace(/\xA0/g,' '); //replace non breaking space nbsp //this is a hack since we dont have a proper tokenizer, but we can estimate 1 token per 3 characters @@ -9810,7 +9870,7 @@ Current version: 111 //for now, append the new image directly into the gtarr let nimgtag = "[<|p|" + data.id + "|p|>]"; gametext_arr.push(nimgtag); - image_db[data.id] = { done: false, queue: "Starting", result: "", alt:sentence, local:false }; + image_db[data.id] = { done: false, queue: "Starting", result: "", prompt:sentence, local:false }; console.log("New image queued " + nimgtag); } else { @@ -9827,16 +9887,16 @@ Current version: 111 { let desired_model = document.getElementById("generate_images_local_model").value; genimg_payload.models = [desired_model]; - let imgid = "A111img"+(Math.floor(10000 + Math.random() * 90000)).toString(); + let imgid = "A1111img"+(Math.floor(10000 + Math.random() * 90000)).toString(); let nimgtag = "[<|p|" + imgid + "|p|>]"; gametext_arr.push(nimgtag); - image_db[imgid] = { done: false, queue: "Generating", result: "", alt:sentence, local:true }; + image_db[imgid] = { done: false, queue: "Generating", result: "", prompt:sentence, local:true }; generate_a1111_image(genimg_payload,(outputimg)=>{ if(outputimg) { //console.log(outputimg); let origImg = "data:image/jpeg;base64," + outputimg; - let imgres = localsettings.img_allowhd?400:256; + let imgres = localsettings.img_allowhd?HD_RES_PX:NO_HD_RES_PX; compressImage(origImg, (newDataUri) => { image_db[imgid].done = true; image_db[imgid].result = newDataUri; @@ -9858,13 +9918,13 @@ Current version: 111 let imgid = "DALLEimg"+(Math.floor(10000 + Math.random() * 90000)).toString(); let nimgtag = "[<|p|" + imgid + "|p|>]"; gametext_arr.push(nimgtag); - image_db[imgid] = { done: false, queue: "Generating", result: "", alt:sentence, local:true }; + image_db[imgid] = { done: false, queue: "Generating", result: "", prompt:sentence, local:true }; generate_dalle_image(genimg_payload,(outputimg)=>{ if(outputimg) { //console.log(outputimg); let origImg = "data:image/jpeg;base64," + outputimg; - let imgres = localsettings.img_allowhd?380:256; + let imgres = localsettings.img_allowhd?HD_RES_PX:NO_HD_RES_PX; compressImage(origImg, (newDataUri) => { image_db[imgid].done = true; image_db[imgid].result = newDataUri; @@ -9878,7 +9938,129 @@ Current version: 111 } } - function click_image(target) + function interrogate_new_image(base64img, imghash) + { + let parts = base64img.split(','); + if (parts.length === 2 && parts[0].startsWith('data:image')) { + base64img = parts[1]; + } + + if(localsettings.generate_images_mode==2) //a1111 + { + let payload = { + "image": base64img, + "model": "clip" + }; + let imgid = "A1111interrogate"+(Math.floor(10000 + Math.random() * 90000)).toString(); + fetch(localsettings.saved_a1111_url + a1111_interrogate_endpoint, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(payload), + }) + .then(x => x.json()) + .then(resp => { + console.log(resp); + if(resp && resp.caption) + { + let caption = resp.caption; + let savedmeta = completed_imgs_meta[imghash]; + if(caption && savedmeta) + { + savedmeta.desc = caption; + update_clicked_image(imghash); + } + } + }).catch((error) => { + console.log("Interrogate Error: " + error); + }); + } + else + { + //horde + let payload = { + "forms": [ + { + "name": "caption" + } + ], + "source_image": base64img + }; + fetch(stablehorde_submit_interrogate_endpoint, { + method: 'POST', // or 'PUT' + headers: { + 'Content-Type': 'application/json', + 'Client-Agent': default_client_agent, + 'apikey': localsettings.my_api_key, + }, + body: JSON.stringify(payload), + }) + .then((response) => response.json()) + .then((data) => { + console.log('interrogate img result:', data); + if (data.id && data.id != "") { + interrogation_db[data.id] = { done: false, result: "", imghash:imghash, local:false }; + console.log("New interrogate queued: " + data.id); + } + else { + //something went wrong. do nothing. + msgbox("Image interrogation failed: " + data.message); + } + }) + .catch((error) => { + console.error('Error:', error); + msgbox("Image interrogation error: " + error); + }); + } + + } + + function toggle_ai_vision(imghash) + { + let savedmeta = completed_imgs_meta[imghash]; + if(savedmeta) + { + savedmeta.enabled = !savedmeta.enabled; + if(!savedmeta.desc && savedmeta.enabled) + { + //request a new interrogation + var alreadysent = Object.values(interrogation_db).some(item => item.imghash === imghash); + if(!alreadysent) + { + let b64 = document.getElementById("zoomedimg").src; + interrogate_new_image(b64,imghash); + } + } + update_clicked_image(imghash); + } + else + { + console.log("IMG META NOT FOUND!"); + } + + } + function update_clicked_image(imghash) + { + let savedmeta = completed_imgs_meta[imghash]; + if(!savedmeta) + { + document.getElementById("zoomedimgdesc").innerText = "No Saved Data"; + } + else + { + let origprompt = (savedmeta.prompt?replaceAll(savedmeta.prompt,"\n"," ") : "No Saved Description"); + origprompt = escapeHtml(origprompt); + let visionstatus = (savedmeta.enabled?(savedmeta.desc?`Active`:`Analyzing...`):`Inactive`); + let togglebtn = (savedmeta.enabled?``:``); + document.getElementById("zoomedimgdesc").innerHTML = ` + AI Vision: `+visionstatus+` ?This allows the AI to visually recognize this image, to see and react to this image. Uses Horde or Local A1111 for image interrogation if enabled. + `+togglebtn+` +
+ `; + } + } + function click_image(target,imghash) { if(target) { @@ -9890,16 +10072,9 @@ Current version: 111 } document.getElementById("zoomedimgcontainer").classList.remove("hidden"); document.getElementById("zoomedimg").src = target.src; - let tmpdsc = target.title; - if(tmpdsc && tmpdsc!="") - { - tmpdsc = replaceAll(tmpdsc,"
"," "); - document.getElementById("zoomedimgdesc").innerText = tmpdsc; - } - else - { - document.getElementById("zoomedimgdesc").innerText = "No Saved Description"; - } + + update_clicked_image(imghash); + } } function delete_curr_image() @@ -9929,7 +10104,7 @@ Current version: 111 let waittime = "Unavailable"; if (image_db[pend_txt] != null) { let qq = image_db[pend_txt].queue; - alttxt = image_db[pend_txt].alt?escapeHtml(image_db[pend_txt].alt):""; + alttxt = image_db[pend_txt].prompt?escapeHtml(image_db[pend_txt].prompt):""; waittime = (qq == 0 ? "Generating" : (qq=="Starting"?qq:"Queue: " + qq)); } else { console.log("Cannot render " + pend_txt); @@ -9937,11 +10112,11 @@ Current version: 111 return `
` + pend_txt + `
` + waittime + `
`; } else { - let imghash = cyrb_hash(data); + let imghash = cyrb_hash(data).trim(); if (completed_imgs_meta[imghash] != null) { - alttxt = completed_imgs_meta[imghash].alt?escapeHtml(completed_imgs_meta[imghash].alt):""; + alttxt = completed_imgs_meta[imghash].prompt?escapeHtml(completed_imgs_meta[imghash].prompt):""; } - return `
`; + return `
`; } } @@ -10158,7 +10333,59 @@ Current version: 111 document.getElementById("lastreq2").innerHTML = lastreq; } + function poll_interrogation_db() + { + let imagecount = Object.keys(interrogation_db).length; + if (!imagecount) return; + + console.log("polling for pending interrogations " + imagecount); + for (let key in interrogation_db) { + let img = interrogation_db[key]; + if (img.done == false && !img.local) { + //call check + fetch(stablehorde_output_interrogate_endpoint + "/" + key) + .then(x => x.json()) + .then((data) => { + console.log('pollimg result:', data); + if (!data.state || (data.state!="processing" && data.state!="done")) { + msgbox("Pending image interrogation could not complete."); + console.log("removing from interrogation: " + key); + delete interrogation_db[key]; + } + else if (data.state == "done") { + //fetch final image + img.done = true; + //save results + if(data.forms && data.forms.length>0 && data.forms[0].result && data.forms[0].result.caption) + { + let caption = data.forms[0].result.caption; + let savedmeta = completed_imgs_meta[img.imghash]; + if(caption && savedmeta) + { + savedmeta.desc = caption; + update_clicked_image(img.imghash); + } + } + + delete interrogation_db[key]; + } + else { + //do nothing + } + }) + .catch((error) => { + console.error('Error:', error); + msgbox("Interrogate poll error: " + error); + delete interrogation_db[key]; + }); + } + } + } + function poll_image_db() { + + poll_interrogation_db(); + //every time this runs, we loop through our image cache for unfinished images and poll for a response //console.log("polling for pending images: " + JSON.stringify(image_db)); let imagecount = Object.keys(image_db).length; @@ -10194,7 +10421,7 @@ Current version: 111 img.queue = 0; let origImg = "data:image/jpeg;base64," + finalimg.generations[0].img; //console.log("Original image: " + origImg); - let imgres = localsettings.img_allowhd?380:256; + let imgres = localsettings.img_allowhd?HD_RES_PX:NO_HD_RES_PX; compressImage(origImg, (newDataUri) => { img.result = newDataUri; }, true, true, imgres); } }) @@ -10233,7 +10460,7 @@ Current version: 111 let newstr = "[<|d|" + img.result + "|d|>]"; console.log("Replacing with Image: " + matchstr); gametext_arr[i] = gametext_arr[i].replace(matchstr, newstr); - completed_imgs_meta[cyrb_hash(img.result)] = {alt:image_db[key].alt}; + completed_imgs_meta[cyrb_hash(img.result)] = {prompt:image_db[key].prompt, desc:"", enabled:false}; delete image_db[key]; } } @@ -10246,7 +10473,7 @@ Current version: 111 } } - function compressImage(inputDataUri, onDone, isJpeg=true, fixedSize=true, maxSize=256, quality = 0.33) { + function compressImage(inputDataUri, onDone, isJpeg=true, fixedSize=true, maxSize=NO_HD_RES_PX, quality = 0.33, forceCrop=false) { let img = document.createElement('img'); let wantedWidth = maxSize; let wantedHeight = maxSize; @@ -10288,7 +10515,15 @@ Current version: 111 canvas.height = wantedHeight; // We resize the image with the canvas method - ctx.drawImage(this, 0, 0, wantedWidth, wantedHeight); + if(forceCrop) + { + let minsize = Math.min(origW, origH); + let mx = (origW - minsize) / 2; + let my = (origH - minsize) / 2; + ctx.drawImage(this, mx, my, minsize, minsize, 0, 0, wantedWidth, wantedHeight); + }else{ + ctx.drawImage(this, 0, 0, wantedWidth, wantedHeight); + } var dataURI = ""; if(isJpeg) @@ -10600,7 +10835,10 @@ Current version: 111 gametext_elem.querySelectorAll('div.storyimg,div.storyimgfloat').forEach( (el) => { let chimg = el.getElementsByTagName("img")[0]; - el.replaceWith((chimg.alt == null || chimg.alt == "") ? ("[<|d|" + chimg.src + "|d|>]") : ("[<|p|" + chimg.alt + "|p|>]")) + if(el && chimg) + { + el.replaceWith((chimg.alt == null || chimg.alt == "") ? ("[<|d|" + chimg.src + "|d|>]") : ("[<|p|" + chimg.alt + "|p|>]")) + } } ); @@ -10678,7 +10916,7 @@ Current version: 111 } } - function concat_gametext(stripimg = false, stripimg_replace_str = "", append_before_segment="",append_after_segment="",escapeTxt=false) { + function concat_gametext(stripimg = false, stripimg_replace_str = "", append_before_segment="",append_after_segment="",escapeTxt=false,insertAIVision=false) { let fulltxt = ""; for (let i = 0; i < gametext_arr.length; ++i) { let extracted = (gametext_arr[i]); @@ -10729,7 +10967,21 @@ Current version: 111 fulltxt = replaceAll(fulltxt,b,get_instruct_endtag(false)); } } - if (stripimg) { + if (stripimg) + { + if(insertAIVision) + { + fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) { + // m here means the whole matched string + let inner = m.substring(5, m.length - 5); + let imghash = cyrb_hash(inner); + let foundmeta = completed_imgs_meta[imghash]; + if (foundmeta != null && foundmeta.enabled && foundmeta.desc) { + return "\n(Attached Image: " + foundmeta.desc + ")\n"; + } + return ""; + }); + } fulltxt = fulltxt.replace(/\[<\|p\|.+?\|p\|>\]/g, stripimg_replace_str); fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, stripimg_replace_str); @@ -12274,7 +12526,7 @@ Current version: 111 for (var i = 0; i < blocks.length; i++) { if (blocks[i].startsWith('```')) { blocks[i] = blocks[i].replace(/```[\s\S]*?\n([\s\S]*?)```/g, - function (m,m2) {return `

${m2.replace(/[“”]/g, "\"")}

`}); + function (m,m2) {return `

${m2.replace(/[“”]/g, "\"")}

`}); } else { blocks[i] = blocks[i].replaceAll('```', '`').replaceAll('``', '`').replace(/`(.*?)`/g, function (m,m2) {return `${m2.replace(/[“”]/g, "\"")}`;}); //remove fancy quotes too @@ -12422,7 +12674,7 @@ Current version: 111 - +

@@ -12469,7 +12721,7 @@ Current version: 111 - +
@@ -13575,6 +13827,7 @@ Current version: 111
Loading...
+
@@ -13659,6 +13912,29 @@ Current version: 111
+ +