fixed img2img

2025-09-15 19:39:42 +00:00 · 2024-04-06 17:29:44 +08:00 · 2024-04-06 17:29:44 +08:00 · 743687020d
commit 743687020d
parent 8131616454
4 changed files with 103 additions and 22 deletions
--- a/expose.h
+++ b/expose.h
@ -117,6 +117,8 @@ struct sd_generation_inputs
 {
    const char * prompt;
    const char * negative_prompt;
+    const char * init_images = "";
+    const float denoising_strength;
    const float cfg_scale;
    const int sample_steps;
    const int width;
--- a/klite.embd
+++ b/klite.embd
@ -7,7 +7,7 @@ Just copy this single static HTML file anywhere and open it in a browser, or fro
 Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
 If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one.
 Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
-Current version: 126
+Current version: 127
 -Concedo
 -->

@ -197,8 +197,10 @@ Current version: 126
 			margin-top: 6px;
 		}

-		#actionmenuitems button {
-			width: 80px;
+		#actionmenuitems button,#actionmenuitems2 button {
+			width: 60px;
+			padding: 4px 4px;
+			font-size: 12px;
 		}

 		#messagefield {
@ -1873,13 +1875,13 @@ Current version: 126
 	{
 		height: calc(98vh - 240px);
 	}
-	@media (max-width: 720px) {
+	@media (max-width: 598px) {
 		.normal_viewport_height
 		{
 			height: calc(98vh - 270px);
 		}
 	}
-	@media (max-width: 406px) {
+	@media (max-width: 342px) {
 		.normal_viewport_height
 		{
 			height: calc(98vh - 300px);
@ -1903,7 +1905,7 @@ Current version: 126
 	}
 	.aesthetic_viewport_height.withmenu
 	{
-		height: calc(98vh - 206px);
+		height: calc(98vh - 198px);
 	}
 	.aesthetic_viewport_height.withtyping
 	{
@ -1911,7 +1913,7 @@ Current version: 126
 	}
 	.aesthetic_viewport_height.withmenu.withtyping
 	{
-		height: calc(98vh - 256px);
+		height: calc(98vh - 248px);
 	}

 	/**
@ -4263,7 +4265,7 @@ Current version: 126
 		}

 		let ep = a1111_txt2img_endpoint;
-		if(req_payload.source_image!="")
+		if(req_payload.source_image && req_payload.source_image!="")
 		{
 			ep = a1111_img2img_endpoint;
 			a1111_t2i_payload.init_images = [req_payload.source_image];
@ -7475,6 +7477,8 @@ Current version: 126
 		});
 	}

+	var cached_model_list = null;
+	var stale_cached_model_time = performance.now();
 	function fetch_models(onDoneCallback)
 	{
 		if(localflag)
@ -7482,6 +7486,14 @@ Current version: 126
 			onDoneCallback(selected_models);
 			return;
 		}
+
+		if(cached_model_list!=null && cached_model_list.length>1 && performance.now() < stale_cached_model_time)
+		{
+			console.log("Reuse cached model list");
+			onDoneCallback(cached_model_list);
+			return;
+		}
+
 		//fetch the model list
 		multifetch(models_endpoints,(resArr,errArr)=>{
 			if(resArr && resArr.length>0)
@ -7500,6 +7512,8 @@ Current version: 126
 					}
 				}

+				cached_model_list = mdls;
+				stale_cached_model_time = performance.now() + 30000; //cache model list for 1m
 				onDoneCallback(mdls);
 			}
 			else
@ -10524,7 +10538,7 @@ Current version: 126
 			"replacement_filter": true,
 			"r2": false
 		}
-		if(base64img!="")
+		if(base64img!=null && base64img!="")
 		{
 			genimg_payload["source_image"] = base64img;
 			genimg_payload["params"]["denoising_strength"] = localsettings.img_img2imgstr;
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -108,6 +108,8 @@ class sd_load_model_inputs(ctypes.Structure):
 class sd_generation_inputs(ctypes.Structure):
    _fields_ = [("prompt", ctypes.c_char_p),
                ("negative_prompt", ctypes.c_char_p),
+                ("init_images", ctypes.c_char_p),
+                ("denoising_strength", ctypes.c_float),
                ("cfg_scale", ctypes.c_float),
                ("sample_steps", ctypes.c_int),
                ("width", ctypes.c_int),
@ -527,6 +529,9 @@ def sd_generate(genparams):
    global maxctx, args, currentusergenkey, totalgens, pendingabortkey
    prompt = genparams.get("prompt", "high quality")
    negative_prompt = genparams.get("negative_prompt", "")
+    init_images_arr = genparams.get("init_images", [])
+    init_images = ("" if (not init_images_arr or len(init_images_arr)==0 or not init_images_arr[0]) else init_images_arr[0])
+    denoising_strength = genparams.get("denoising_strength", 0.6)
    cfg_scale = genparams.get("cfg_scale", 5)
    sample_steps = genparams.get("steps", 20)
    width = genparams.get("width", 512)
@ -535,7 +540,6 @@ def sd_generate(genparams):
    sample_method = genparams.get("sampler_name", "k_euler_a")
    is_quiet = True if args.quiet else False

-
    #clean vars
    width = width - (width%64)
    height = height - (height%64)
@ -569,7 +573,9 @@ def sd_generate(genparams):
    inputs = sd_generation_inputs()
    inputs.prompt = prompt.encode("UTF-8")
    inputs.negative_prompt = negative_prompt.encode("UTF-8")
+    inputs.init_images = init_images.encode("UTF-8")
    inputs.cfg_scale = cfg_scale
+    inputs.denoising_strength = denoising_strength
    inputs.sample_steps = sample_steps
    inputs.width = width
    inputs.height = height
@ -1219,7 +1225,7 @@ Enter Prompt:<br>
            sse_stream_flag = False

            api_format = 0 #1=basic,2=kai,3=oai,4=oai-chat,5=interrogate
-            is_txt2img = False
+            is_imggen = False

            if self.path.endswith('/request'):
                api_format = 1
@ -1249,14 +1255,14 @@ Enter Prompt:<br>
                    return
                api_format = 5

-            if self.path.endswith('/sdapi/v1/txt2img'):
-                is_txt2img = True
+            if self.path.endswith('/sdapi/v1/txt2img') or self.path.endswith('/sdapi/v1/img2img'):
+                is_imggen = True

-            if is_txt2img or api_format > 0:
+            if is_imggen or api_format > 0:
                global last_req_time
                last_req_time = time.time()

-                if not is_txt2img and api_format<5:
+                if not is_imggen and api_format<5:
                    if not self.secure_endpoint():
                        return

@ -1297,7 +1303,7 @@ Enter Prompt:<br>
                        time.sleep(0.2) #short delay
                    return

-                elif is_txt2img: #image gen
+                elif is_imggen: #image gen
                    try:
                        gen = sd_generate(genparams)
                        genresp = (json.dumps({"images":[gen],"parameters":{},"info":""}).encode())
--- a/otherarch/sdcpp/sdtype_adapter.cpp
+++ b/otherarch/sdcpp/sdtype_adapter.cpp
@ -18,6 +18,8 @@
 #include "model.cpp"
 #include "zip.c"

+#include "otherarch/utils.h"
+
 // #include "preprocessing.hpp"
 #include "stable-diffusion.h"

@ -264,6 +266,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
    //sanitize prompts, remove quotes and limit lengths
    std::string cleanprompt = clean_input_prompt(inputs.prompt);
    std::string cleannegprompt = clean_input_prompt(inputs.negative_prompt);
+    std::string img2img_data = std::string(inputs.init_images);

    sd_params->prompt = cleanprompt;
    sd_params->negative_prompt = cleannegprompt;
@ -272,6 +275,13 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
    sd_params->seed = inputs.seed;
    sd_params->width = inputs.width;
    sd_params->height = inputs.height;
+    sd_params->strength = inputs.denoising_strength;
+    sd_params->mode = (img2img_data==""?SDMode::TXT2IMG:SDMode::IMG2IMG);
+
+    //for img2img
+    sd_image_t input_image = {0,0,0,nullptr};
+    std::vector<uint8_t> image_buffer;
+    int nx, ny, nc;

    if(!is_quiet)
    {
@ -316,7 +326,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)

        if(!is_quiet && sddebugmode==1)
        {
-            printf("\nPROMPT:%s\nNPROMPT:%s\nCLPSKP:%d\nCFGSCLE:%f\nW:%d\nH:%d\nSM:%d\nSTEP:%d\nSEED:%d\nBATCH:%d\nCIMG:%d\nCSTR:%f\n\n",
+            printf("\nTXT2IMG PROMPT:%s\nNPROMPT:%s\nCLPSKP:%d\nCFGSCLE:%f\nW:%d\nH:%d\nSM:%d\nSTEP:%d\nSEED:%d\nBATCH:%d\nCIMG:%d\nCSTR:%f\n\n",
            sd_params->prompt.c_str(),
            sd_params->negative_prompt.c_str(),
            sd_params->clip_skip,
@ -344,10 +354,59 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
                          control_image,
                          sd_params->control_strength);
    } else {
-        sd_image_t input_image = {(uint32_t)sd_params->width,
-                                  (uint32_t)sd_params->height,
-                                  3,
-                                  input_image_buffer};
+
+        if (sd_params->width <= 0 || sd_params->width % 64 != 0 || sd_params->height <= 0 || sd_params->height % 64 != 0) {
+            printf("\nKCPP SD: bad request image dimensions!\n");
+            output.data = "";
+            output.status = 0;
+            return output;
+        }
+
+        image_buffer = kcpp_base64_decode(img2img_data);
+
+        if(input_image_buffer!=nullptr) //just in time free old buffer
+        {
+             stbi_image_free(input_image_buffer);
+             input_image_buffer = nullptr;
+        }
+
+        input_image_buffer = stbi_load_from_memory(image_buffer.data(), image_buffer.size(), &nx, &ny, &nc, 3);
+
+        if (nx <= 0 || nx % 64 != 0 || ny <= 0 || ny % 64 != 0 || nc!= 3) {
+            printf("\nKCPP SD: bad input image dimensions!\n");
+            output.data = "";
+            output.status = 0;
+            return output;
+        }
+        if (!input_image_buffer) {
+            printf("\nKCPP SD: load image from memory failed!\n");
+            output.data = "";
+            output.status = 0;
+            return output;
+        }
+
+        input_image.width = nx;
+        input_image.height = ny;
+        input_image.channel = nc;
+        input_image.data = input_image_buffer;
+
+        if(!is_quiet && sddebugmode==1)
+        {
+            printf("\nIMG2IMG PROMPT:%s\nNPROMPT:%s\nCLPSKP:%d\nCFGSCLE:%f\nW:%d\nH:%d\nSM:%d\nSTEP:%d\nSEED:%d\nBATCH:%d\nCIMG:%d\nSTR:%f\n\n",
+            sd_params->prompt.c_str(),
+            sd_params->negative_prompt.c_str(),
+            sd_params->clip_skip,
+            sd_params->cfg_scale,
+            sd_params->width,
+            sd_params->height,
+            sd_params->sample_method,
+            sd_params->sample_steps,
+            sd_params->seed,
+            sd_params->batch_count,
+            control_image,
+            sd_params->strength);
+        }
+
        results = img2img(sd_ctx,
                            input_image,
                            sd_params->prompt.c_str(),