From 1ae0a524ad024ac9060f2445054cfe15bf0482aa Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Thu, 15 Feb 2024 10:51:41 +0800
Subject: [PATCH] added ai vision
---
klite.embd | 392 +++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 334 insertions(+), 58 deletions(-)
diff --git a/klite.embd b/klite.embd
index a0199ad3a..4bf6f7f47 100644
--- a/klite.embd
+++ b/klite.embd
@@ -6,7 +6,7 @@ It requires no dependencies, installation or setup.
Just copy this single static HTML file anywhere and open it in a browser, or from a webserver.
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
-Current version: 111
+Current version: 112
-Concedo
-->
@@ -1509,6 +1509,7 @@ Current version: 111
max-height: 120px;
overflow-y: auto;
overflow-x: hidden;
+ font-size: 12px;
}
.mdlpicker::-webkit-calendar-picker-indicator {
opacity: 100;
@@ -3294,6 +3295,8 @@ Current version: 111
const stablehorde_poll_endpoint = stablehorde_url + "/api/v2/generate/check";
const stablehorde_output_endpoint = stablehorde_url + "/api/v2/generate/status";
const stablehorde_model_endpoint = stablehorde_url + "/api/v2/status/models";
+ const stablehorde_submit_interrogate_endpoint = stablehorde_url + "/api/v2/interrogate/async";
+ const stablehorde_output_interrogate_endpoint = stablehorde_url + "/api/v2/interrogate/status";
const kobold_custom_gen_endpoint = "/api/v1/generate";
const kobold_custom_gen_stream_endpoint = "/api/extra/generate/stream";
@@ -3327,6 +3330,7 @@ Current version: 111
const a1111_models_endpoint = "/sdapi/v1/sd-models";
const a1111_options_endpoint = "/sdapi/v1/options";
const a1111_txt2img_endpoint = "/sdapi/v1/txt2img";
+ const a1111_interrogate_endpoint = "/sdapi/v1/interrogate";
const xtts_gen_endpoint = "/tts_to_audio/";
const xtts_voices_endpoint = "/speakers_list";
@@ -3345,6 +3349,8 @@ Current version: 111
const default_xtts_base = " http://localhost:8020";
const XTTS_ID = 1000;
+ const HD_RES_PX = 400;
+ const NO_HD_RES_PX = 256;
//all configurable globals
var perfdata = null; //if it's null, we are not connected
@@ -3379,6 +3385,7 @@ Current version: 111
var generateimagesinterval = 650; //if generated images is enabled, it will trigger after every 600 new characters in context.
var nextgeneratedimagemilestone = generateimagesinterval; //used to keep track of when to generate the next image
var image_db = {}; //stores a dictionary of pending images
+ var interrogation_db = {};
var completed_imgs_meta = {}; //stores temp info on completed images like alt text
//key is ID, body is {done:false,queue:10,result:""}
var stablemodels = [{"name": "stable_diffusion","count": 1}]; //stored as {name,count}
@@ -4703,6 +4710,7 @@ Current version: 111
let new_save_storyobj = generate_base_storyobj();
let export_arr = gametext_arr;
+ let export_hashes = {};
if(!save_images)
{
export_arr = [];
@@ -4710,6 +4718,22 @@ Current version: 111
export_arr.push(gametext_arr[i].replace(/\[<\|p\|.+?\|p\|>\]/g, "").replace(/\[<\|d\|.+?\|d\|>\]/g, ""));
}
}
+ else
+ {
+ //bake used image metas into savefile
+ for (let i = 0; i < gametext_arr.length; ++i) {
+ let matches = gametext_arr[i].match(/\[<\|d\|.+?\|d\|>\]/g);
+ for(let m in matches)
+ {
+ let inner = matches[m].substring(5, matches[m].length - 5);
+ let imghash = cyrb_hash(inner);
+ if (completed_imgs_meta[imghash] != null) {
+ export_hashes[imghash] = completed_imgs_meta[imghash];
+ }
+ }
+ }
+ new_save_storyobj.completed_imgs_meta = export_hashes;
+ }
if (export_arr.length > 0) {
new_save_storyobj.prompt = export_arr[0];
@@ -5073,6 +5097,14 @@ Current version: 111
}
}
}
+
+ if(storyobj.completed_imgs_meta)
+ {
+ for (var key in storyobj.completed_imgs_meta)
+ {
+ completed_imgs_meta[key] = storyobj.completed_imgs_meta[key];
+ }
+ }
}
//port over old images to the new format
@@ -6076,6 +6108,7 @@ Current version: 111
document.getElementById("zoomedimgcontainer").classList.contains("hidden") &&
document.getElementById("groupselectcontainer").classList.contains("hidden") &&
document.getElementById("imagestylecontainer").classList.contains("hidden") &&
+ document.getElementById("addimgcontainer").classList.contains("hidden") &&
document.getElementById("advancedloadfile").classList.contains("hidden")
);
}
@@ -6096,6 +6129,7 @@ Current version: 111
document.getElementById("zoomedimgcontainer").classList.add("hidden");
document.getElementById("groupselectcontainer").classList.add("hidden");
document.getElementById("imagestylecontainer").classList.add("hidden");
+ document.getElementById("addimgcontainer").classList.add("hidden");
document.getElementById("advancedloadfile").classList.add("hidden");
}
@@ -8187,6 +8221,7 @@ Current version: 111
document.getElementById("cht_inp").value = "";
chat_resize_input();
image_db = {};
+ interrogation_db = {};
completed_imgs_meta = {};
localsettings.adventure_is_action = false;
prev_hl_chunk = null;
@@ -8456,29 +8491,6 @@ Current version: 111
show_abort_button(false);
}
- var addimgLongPressTimer = null;
- function btn_addimg_longpress_start()
- {
- addimgLongPressTimer = setTimeout(()=>{
- popup_manual_image();
- }, 2000);
- }
- function btn_addimg_longpress_end()
- {
- clearTimeout(addimgLongPressTimer);
- }
- function popup_manual_image()
- {
- inputBox("Tip: You can generate images manually by long-pressing the 'Add Img' button.\n\nEnter a prompt to generate an image with.","Generate Image Manually","","Enter a Prompt",()=>{
- let userinput = getInputBoxValue();
- if(userinput.trim()!="")
- {
- var sentence = userinput.trim().substring(0, 300);
- do_manual_gen_image(sentence);
- }
- },false);
- }
-
function do_manual_gen_image(sentence)
{
generate_new_image(sentence);
@@ -8491,7 +8503,7 @@ Current version: 111
}, 10000);
}
- function manual_gen_image() {
+ function add_img_btn_auto() {
let truncated_context = concat_gametext(true, "");
truncated_context = replace_placeholders(truncated_context);
var tclen = truncated_context.length;
@@ -8504,8 +8516,56 @@ Current version: 111
do_manual_gen_image(sentence);
}
}else{
- popup_manual_image();
+ msgbox("Error: Your current story is blank.\nAdd some text, or try generating from custom prompt instead.","Story is Blank")
}
+ document.getElementById("addimgcontainer").classList.add("hidden");
+ }
+
+ function add_img_btn_custom()
+ {
+ inputBox("Enter a custom prompt to generate an image with.","Generate Image Manually","","Enter a Prompt",()=>{
+ let userinput = getInputBoxValue();
+ if(userinput.trim()!="")
+ {
+ var sentence = userinput.trim().substring(0, 300);
+ do_manual_gen_image(sentence);
+ }
+ },false);
+ document.getElementById("addimgcontainer").classList.add("hidden");
+ }
+
+ function add_img_btn_upload()
+ {
+ let finput = document.getElementById('addimgfileinput');
+ finput.click();
+ finput.onchange = (event) => {
+ if (event.target.files.length > 0 && event.target.files[0]) {
+ const file = event.target.files[0];
+ const reader = new FileReader();
+ reader.onload = function(img) {
+
+ let imgid = "selfuploadimg"+(Math.floor(10000 + Math.random() * 90000)).toString();
+ let nimgtag = "[<|p|" + imgid + "|p|>]";
+ gametext_arr.push(nimgtag);
+ image_db[imgid] = { done: false, queue: "Generating", result: "", prompt:"", local:true };
+
+ let imgres = localsettings.img_allowhd?HD_RES_PX:NO_HD_RES_PX;
+ compressImage(img.target.result, (newDataUri) => {
+ image_db[imgid].done = true;
+ image_db[imgid].result = newDataUri;
+ }, true, true, imgres,0.33,true);
+
+ }
+ reader.readAsDataURL(file);
+ }
+ finput.value = "";
+ };
+ document.getElementById("addimgcontainer").classList.add("hidden");
+ }
+
+ function add_img_btn_menu()
+ {
+ document.getElementById("addimgcontainer").classList.remove("hidden");
}
var xtts_is_connected = false;
@@ -8788,7 +8848,7 @@ Current version: 111
}
}
- let truncated_context = concat_gametext(true, ""); //no need to truncate if memory is empty
+ let truncated_context = concat_gametext(true, "","","",false,true); //no need to truncate if memory is empty
truncated_context = truncated_context.replace(/\xA0/g,' '); //replace non breaking space nbsp
//this is a hack since we dont have a proper tokenizer, but we can estimate 1 token per 3 characters
@@ -9810,7 +9870,7 @@ Current version: 111
//for now, append the new image directly into the gtarr
let nimgtag = "[<|p|" + data.id + "|p|>]";
gametext_arr.push(nimgtag);
- image_db[data.id] = { done: false, queue: "Starting", result: "", alt:sentence, local:false };
+ image_db[data.id] = { done: false, queue: "Starting", result: "", prompt:sentence, local:false };
console.log("New image queued " + nimgtag);
}
else {
@@ -9827,16 +9887,16 @@ Current version: 111
{
let desired_model = document.getElementById("generate_images_local_model").value;
genimg_payload.models = [desired_model];
- let imgid = "A111img"+(Math.floor(10000 + Math.random() * 90000)).toString();
+ let imgid = "A1111img"+(Math.floor(10000 + Math.random() * 90000)).toString();
let nimgtag = "[<|p|" + imgid + "|p|>]";
gametext_arr.push(nimgtag);
- image_db[imgid] = { done: false, queue: "Generating", result: "", alt:sentence, local:true };
+ image_db[imgid] = { done: false, queue: "Generating", result: "", prompt:sentence, local:true };
generate_a1111_image(genimg_payload,(outputimg)=>{
if(outputimg)
{
//console.log(outputimg);
let origImg = "data:image/jpeg;base64," + outputimg;
- let imgres = localsettings.img_allowhd?400:256;
+ let imgres = localsettings.img_allowhd?HD_RES_PX:NO_HD_RES_PX;
compressImage(origImg, (newDataUri) => {
image_db[imgid].done = true;
image_db[imgid].result = newDataUri;
@@ -9858,13 +9918,13 @@ Current version: 111
let imgid = "DALLEimg"+(Math.floor(10000 + Math.random() * 90000)).toString();
let nimgtag = "[<|p|" + imgid + "|p|>]";
gametext_arr.push(nimgtag);
- image_db[imgid] = { done: false, queue: "Generating", result: "", alt:sentence, local:true };
+ image_db[imgid] = { done: false, queue: "Generating", result: "", prompt:sentence, local:true };
generate_dalle_image(genimg_payload,(outputimg)=>{
if(outputimg)
{
//console.log(outputimg);
let origImg = "data:image/jpeg;base64," + outputimg;
- let imgres = localsettings.img_allowhd?380:256;
+ let imgres = localsettings.img_allowhd?HD_RES_PX:NO_HD_RES_PX;
compressImage(origImg, (newDataUri) => {
image_db[imgid].done = true;
image_db[imgid].result = newDataUri;
@@ -9878,7 +9938,129 @@ Current version: 111
}
}
- function click_image(target)
+ function interrogate_new_image(base64img, imghash)
+ {
+ let parts = base64img.split(',');
+ if (parts.length === 2 && parts[0].startsWith('data:image')) {
+ base64img = parts[1];
+ }
+
+ if(localsettings.generate_images_mode==2) //a1111
+ {
+ let payload = {
+ "image": base64img,
+ "model": "clip"
+ };
+ let imgid = "A1111interrogate"+(Math.floor(10000 + Math.random() * 90000)).toString();
+ fetch(localsettings.saved_a1111_url + a1111_interrogate_endpoint, {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ },
+ body: JSON.stringify(payload),
+ })
+ .then(x => x.json())
+ .then(resp => {
+ console.log(resp);
+ if(resp && resp.caption)
+ {
+ let caption = resp.caption;
+ let savedmeta = completed_imgs_meta[imghash];
+ if(caption && savedmeta)
+ {
+ savedmeta.desc = caption;
+ update_clicked_image(imghash);
+ }
+ }
+ }).catch((error) => {
+ console.log("Interrogate Error: " + error);
+ });
+ }
+ else
+ {
+ //horde
+ let payload = {
+ "forms": [
+ {
+ "name": "caption"
+ }
+ ],
+ "source_image": base64img
+ };
+ fetch(stablehorde_submit_interrogate_endpoint, {
+ method: 'POST', // or 'PUT'
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Client-Agent': default_client_agent,
+ 'apikey': localsettings.my_api_key,
+ },
+ body: JSON.stringify(payload),
+ })
+ .then((response) => response.json())
+ .then((data) => {
+ console.log('interrogate img result:', data);
+ if (data.id && data.id != "") {
+ interrogation_db[data.id] = { done: false, result: "", imghash:imghash, local:false };
+ console.log("New interrogate queued: " + data.id);
+ }
+ else {
+ //something went wrong. do nothing.
+ msgbox("Image interrogation failed: " + data.message);
+ }
+ })
+ .catch((error) => {
+ console.error('Error:', error);
+ msgbox("Image interrogation error: " + error);
+ });
+ }
+
+ }
+
+ function toggle_ai_vision(imghash)
+ {
+ let savedmeta = completed_imgs_meta[imghash];
+ if(savedmeta)
+ {
+ savedmeta.enabled = !savedmeta.enabled;
+ if(!savedmeta.desc && savedmeta.enabled)
+ {
+ //request a new interrogation
+ var alreadysent = Object.values(interrogation_db).some(item => item.imghash === imghash);
+ if(!alreadysent)
+ {
+ let b64 = document.getElementById("zoomedimg").src;
+ interrogate_new_image(b64,imghash);
+ }
+ }
+ update_clicked_image(imghash);
+ }
+ else
+ {
+ console.log("IMG META NOT FOUND!");
+ }
+
+ }
+ function update_clicked_image(imghash)
+ {
+ let savedmeta = completed_imgs_meta[imghash];
+ if(!savedmeta)
+ {
+ document.getElementById("zoomedimgdesc").innerText = "No Saved Data";
+ }
+ else
+ {
+ let origprompt = (savedmeta.prompt?replaceAll(savedmeta.prompt,"\n"," ") : "No Saved Description");
+ origprompt = escapeHtml(origprompt);
+ let visionstatus = (savedmeta.enabled?(savedmeta.desc?`Active`:`Analyzing...`):`Inactive`);
+ let togglebtn = (savedmeta.enabled?``:``);
+ document.getElementById("zoomedimgdesc").innerHTML = `
+ AI Vision: `+visionstatus+` ?This allows the AI to visually recognize this image, to see and react to this image. Uses Horde or Local A1111 for image interrogation if enabled.
+ `+togglebtn+`
+
+ `;
+ }
+ }
+ function click_image(target,imghash)
{
if(target)
{
@@ -9890,16 +10072,9 @@ Current version: 111
}
document.getElementById("zoomedimgcontainer").classList.remove("hidden");
document.getElementById("zoomedimg").src = target.src;
- let tmpdsc = target.title;
- if(tmpdsc && tmpdsc!="")
- {
- tmpdsc = replaceAll(tmpdsc,"
"," ");
- document.getElementById("zoomedimgdesc").innerText = tmpdsc;
- }
- else
- {
- document.getElementById("zoomedimgdesc").innerText = "No Saved Description";
- }
+
+ update_clicked_image(imghash);
+
}
}
function delete_curr_image()
@@ -9929,7 +10104,7 @@ Current version: 111
let waittime = "Unavailable";
if (image_db[pend_txt] != null) {
let qq = image_db[pend_txt].queue;
- alttxt = image_db[pend_txt].alt?escapeHtml(image_db[pend_txt].alt):"";
+ alttxt = image_db[pend_txt].prompt?escapeHtml(image_db[pend_txt].prompt):"";
waittime = (qq == 0 ? "Generating" : (qq=="Starting"?qq:"Queue: " + qq));
} else {
console.log("Cannot render " + pend_txt);
@@ -9937,11 +10112,11 @@ Current version: 111
return `
${m2.replace(/[“”]/g, "\"")}`}); + function (m,m2) {return `
${m2.replace(/[“”]/g, "\"")}`});
}
else {
blocks[i] = blocks[i].replaceAll('```', '`').replaceAll('``', '`').replace(/`(.*?)`/g, function (m,m2) {return `${m2.replace(/[“”]/g, "\"")}`;}); //remove fancy quotes too
@@ -12422,7 +12674,7 @@ Current version: 111
-
+