now accept multiple images for reference images

This commit is contained in:
Concedo 2025-06-28 17:30:28 +08:00
parent 2e14338455
commit 4ec0e0fd21
6 changed files with 128 additions and 76 deletions

View file

@ -181,7 +181,8 @@ struct sd_generation_inputs
const char * negative_prompt = nullptr; const char * negative_prompt = nullptr;
const char * init_images = ""; const char * init_images = "";
const char * mask = ""; const char * mask = "";
const char * extra_image = ""; const int extra_images_len = 0;
const char ** extra_images = nullptr;
const bool flip_mask = false; const bool flip_mask = false;
const float denoising_strength = 0.0f; const float denoising_strength = 0.0f;
const float cfg_scale = 0.0f; const float cfg_scale = 0.0f;

File diff suppressed because one or more lines are too long

View file

@ -59,6 +59,7 @@ stop_token_max = 256
ban_token_max = 768 ban_token_max = 768
logit_bias_max = 512 logit_bias_max = 512
dry_seq_break_max = 128 dry_seq_break_max = 128
extra_images_max = 4
# global vars # global vars
KcppVersion = "1.94.2" KcppVersion = "1.94.2"
@ -291,7 +292,8 @@ class sd_generation_inputs(ctypes.Structure):
("negative_prompt", ctypes.c_char_p), ("negative_prompt", ctypes.c_char_p),
("init_images", ctypes.c_char_p), ("init_images", ctypes.c_char_p),
("mask", ctypes.c_char_p), ("mask", ctypes.c_char_p),
("extra_image", ctypes.c_char_p), ("extra_images_len", ctypes.c_int),
("extra_images", ctypes.POINTER(ctypes.c_char_p)),
("flip_mask", ctypes.c_bool), ("flip_mask", ctypes.c_bool),
("denoising_strength", ctypes.c_float), ("denoising_strength", ctypes.c_float),
("cfg_scale", ctypes.c_float), ("cfg_scale", ctypes.c_float),
@ -1714,7 +1716,9 @@ def sd_generate(genparams):
seed = random.randint(100000, 999999) seed = random.randint(100000, 999999)
sample_method = genparams.get("sampler_name", "k_euler_a") sample_method = genparams.get("sampler_name", "k_euler_a")
clip_skip = tryparseint(genparams.get("clip_skip", -1),-1) clip_skip = tryparseint(genparams.get("clip_skip", -1),-1)
extra_image = strip_base64_prefix(genparams.get("extra_image", "")) extra_images_arr = genparams.get("extra_images", [])
extra_images_arr = ([] if not extra_images_arr else extra_images_arr)
extra_images_arr = extra_images_arr[:extra_images_max]
#clean vars #clean vars
cfg_scale = (1 if cfg_scale < 1 else (25 if cfg_scale > 25 else cfg_scale)) cfg_scale = (1 if cfg_scale < 1 else (25 if cfg_scale > 25 else cfg_scale))
@ -1728,7 +1732,11 @@ def sd_generate(genparams):
inputs.negative_prompt = negative_prompt.encode("UTF-8") inputs.negative_prompt = negative_prompt.encode("UTF-8")
inputs.init_images = init_images.encode("UTF-8") inputs.init_images = init_images.encode("UTF-8")
inputs.mask = "".encode("UTF-8") if not mask else mask.encode("UTF-8") inputs.mask = "".encode("UTF-8") if not mask else mask.encode("UTF-8")
inputs.extra_image = "".encode("UTF-8") if not extra_image else extra_image.encode("UTF-8") inputs.extra_images_len = len(extra_images_arr)
inputs.extra_images = (ctypes.c_char_p * inputs.extra_images_len)()
for n, estr in enumerate(extra_images_arr):
extra_image = strip_base64_prefix(estr)
inputs.extra_images[n] = extra_image.encode("UTF-8")
inputs.flip_mask = flip_mask inputs.flip_mask = flip_mask
inputs.cfg_scale = cfg_scale inputs.cfg_scale = cfg_scale
inputs.denoising_strength = denoising_strength inputs.denoising_strength = denoising_strength

View file

@ -116,7 +116,8 @@ static int sddebugmode = 0;
static std::string recent_data = ""; static std::string recent_data = "";
static uint8_t * input_image_buffer = NULL; static uint8_t * input_image_buffer = NULL;
static uint8_t * input_mask_buffer = NULL; static uint8_t * input_mask_buffer = NULL;
static uint8_t * input_extraimage_buffer = NULL; static std::vector<uint8_t *> input_extraimage_buffers;
const int max_extra_images = 4;
static std::string sdplatformenv, sddeviceenv, sdvulkandeviceenv; static std::string sdplatformenv, sddeviceenv, sdvulkandeviceenv;
static int cfg_tiled_vae_threshold = 0; static int cfg_tiled_vae_threshold = 0;
@ -288,8 +289,9 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
sd_ctx->sd->apply_lora_from_file(lorafilename,inputs.lora_multiplier); sd_ctx->sd->apply_lora_from_file(lorafilename,inputs.lora_multiplier);
} }
return true; input_extraimage_buffers.reserve(max_extra_images);
return true;
} }
std::string clean_input_prompt(const std::string& input) { std::string clean_input_prompt(const std::string& input) {
@ -434,7 +436,12 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
std::string cleannegprompt = clean_input_prompt(inputs.negative_prompt); std::string cleannegprompt = clean_input_prompt(inputs.negative_prompt);
std::string img2img_data = std::string(inputs.init_images); std::string img2img_data = std::string(inputs.init_images);
std::string img2img_mask = std::string(inputs.mask); std::string img2img_mask = std::string(inputs.mask);
std::string extra_image_data = std::string(inputs.extra_image); std::vector<std::string> extra_image_data;
for(int i=0;i<inputs.extra_images_len;++i)
{
extra_image_data.push_back(std::string(inputs.extra_images[i]));
}
std::string sampler = inputs.sample_method; std::string sampler = inputs.sample_method;
sd_params->prompt = cleanprompt; sd_params->prompt = cleanprompt;
@ -503,17 +510,20 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
//for img2img //for img2img
sd_image_t input_image = {0,0,0,nullptr}; sd_image_t input_image = {0,0,0,nullptr};
sd_image_t extraimage_reference = {0,0,0,nullptr}; std::vector<sd_image_t> extraimage_references;
extraimage_references.reserve(max_extra_images);
std::vector<uint8_t> image_buffer; std::vector<uint8_t> image_buffer;
std::vector<uint8_t> image_mask_buffer; std::vector<uint8_t> image_mask_buffer;
std::vector<uint8_t> extraimage_buffer; std::vector<std::vector<uint8_t>> extraimage_buffers;
extraimage_buffers.reserve(max_extra_images);
int nx, ny, nc; int nx, ny, nc;
int img2imgW = sd_params->width; //for img2img input int img2imgW = sd_params->width; //for img2img input
int img2imgH = sd_params->height; int img2imgH = sd_params->height;
int img2imgC = 3; // Assuming RGB image int img2imgC = 3; // Assuming RGB image
std::vector<uint8_t> resized_image_buf(img2imgW * img2imgH * img2imgC); std::vector<uint8_t> resized_image_buf(img2imgW * img2imgH * img2imgC);
std::vector<uint8_t> resized_mask_buf(img2imgW * img2imgH * img2imgC); std::vector<uint8_t> resized_mask_buf(img2imgW * img2imgH * img2imgC);
std::vector<uint8_t> resized_extraimage_buf(img2imgW * img2imgH * img2imgC); std::vector<std::vector<uint8_t>> resized_extraimage_bufs(max_extra_images, std::vector<uint8_t>(img2imgW * img2imgH * img2imgC));
std::string ts = get_timestamp_str(); std::string ts = get_timestamp_str();
if(!sd_is_quiet) if(!sd_is_quiet)
@ -558,29 +568,39 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
sd_params->sample_method = sample_method_t::EULER_A; sd_params->sample_method = sample_method_t::EULER_A;
} }
if(extra_image_data!="") if(extra_image_data.size()>0)
{ {
if(input_extraimage_buffer!=nullptr) //just in time free old buffer if(input_extraimage_buffers.size()>0) //just in time free old buffer
{ {
stbi_image_free(input_extraimage_buffer); for(int i=0;i<input_extraimage_buffers.size();++i)
input_extraimage_buffer = nullptr; {
stbi_image_free(input_extraimage_buffers[i]);
}
input_extraimage_buffers.clear();
} }
int nx2, ny2, nc2; extraimage_buffers.clear();
int desiredchannels = 3; extraimage_references.clear();
extraimage_buffer = kcpp_base64_decode(extra_image_data); for(int i=0;i<extra_image_data.size() && i<max_extra_images;++i)
input_extraimage_buffer = stbi_load_from_memory(extraimage_buffer.data(), extraimage_buffer.size(), &nx2, &ny2, &nc2, desiredchannels); {
// Resize the image int nx2, ny2, nc2;
int resok = stbir_resize_uint8(input_extraimage_buffer, nx2, ny2, 0, resized_extraimage_buf.data(), img2imgW, img2imgH, 0, desiredchannels); int desiredchannels = 3;
if (!resok) { extraimage_buffers.push_back(kcpp_base64_decode(extra_image_data[i]));
printf("\nKCPP SD: resize extra image failed!\n"); input_extraimage_buffers.push_back(stbi_load_from_memory(extraimage_buffers[i].data(), extraimage_buffers[i].size(), &nx2, &ny2, &nc2, desiredchannels));
output.data = ""; // Resize the image
output.status = 0; int resok = stbir_resize_uint8(input_extraimage_buffers[i], nx2, ny2, 0, resized_extraimage_bufs[i].data(), img2imgW, img2imgH, 0, desiredchannels);
return output; if (!resok) {
printf("\nKCPP SD: resize extra image failed!\n");
output.data = "";
output.status = 0;
return output;
}
sd_image_t extraimage_reference;
extraimage_reference.width = img2imgW;
extraimage_reference.height = img2imgH;
extraimage_reference.channel = desiredchannels;
extraimage_reference.data = resized_extraimage_bufs[i].data();
extraimage_references.push_back(extraimage_reference);
} }
extraimage_reference.width = img2imgW;
extraimage_reference.height = img2imgH;
extraimage_reference.channel = desiredchannels;
extraimage_reference.data = resized_extraimage_buf.data();
//ensure prompt has img keyword, otherwise append it //ensure prompt has img keyword, otherwise append it
if(photomaker_enabled) if(photomaker_enabled)
@ -595,9 +615,29 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
} }
std::vector<sd_image_t> kontext_imgs; std::vector<sd_image_t> kontext_imgs;
if(extra_image_data!="" && loadedsdver==SDVersion::VERSION_FLUX && !sd_loaded_chroma()) if(extra_image_data.size()>0 && loadedsdver==SDVersion::VERSION_FLUX && !sd_loaded_chroma())
{ {
kontext_imgs.push_back(extraimage_reference); for(int i=0;i<extra_image_data.size();++i)
{
kontext_imgs.push_back(extraimage_references[i]);
}
if(!sd_is_quiet && sddebugmode==1)
{
printf("\nFlux Kontext: Using %d reference images\n",kontext_imgs.size());
}
}
std::vector<sd_image_t*> photomaker_imgs;
if(photomaker_enabled && extra_image_data.size()>0)
{
for(int i=0;i<extra_image_data.size();++i)
{
photomaker_imgs.push_back(&extraimage_references[i]);
}
if(!sd_is_quiet && sddebugmode==1)
{
printf("\nPhotomaker: Using %d reference images\n",photomaker_imgs.size());
}
} }
if (sd_params->mode == TXT2IMG) { if (sd_params->mode == TXT2IMG) {
@ -644,7 +684,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
sd_params->slg_scale, sd_params->slg_scale,
sd_params->skip_layer_start, sd_params->skip_layer_start,
sd_params->skip_layer_end, sd_params->skip_layer_end,
(photomaker_enabled && extra_image_data!=""?(&extraimage_reference):nullptr)); photomaker_imgs);
} else { } else {
if (sd_params->width <= 0 || sd_params->width % 64 != 0 || sd_params->height <= 0 || sd_params->height % 64 != 0) { if (sd_params->width <= 0 || sd_params->width % 64 != 0 || sd_params->height <= 0 || sd_params->height % 64 != 0) {
@ -769,7 +809,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
sd_params->slg_scale, sd_params->slg_scale,
sd_params->skip_layer_start, sd_params->skip_layer_start,
sd_params->skip_layer_end, sd_params->skip_layer_end,
(photomaker_enabled && extra_image_data!=""?(&extraimage_reference):nullptr)); photomaker_imgs);
} }
if (results == NULL) { if (results == NULL) {

View file

@ -1422,7 +1422,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
float skip_layer_start = 0.01, float skip_layer_start = 0.01,
float skip_layer_end = 0.2, float skip_layer_end = 0.2,
ggml_tensor* masked_image = NULL, ggml_tensor* masked_image = NULL,
const sd_image_t* photomaker_reference = nullptr) { const std::vector<sd_image_t*> photomaker_references = std::vector<sd_image_t*>()) {
if (seed < 0) { if (seed < 0) {
// Generally, when using the provided command line, the seed is always >0. // Generally, when using the provided command line, the seed is always >0.
// However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library // However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library
@ -1465,7 +1465,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
ggml_tensor* init_img = NULL; ggml_tensor* init_img = NULL;
SDCondition id_cond; SDCondition id_cond;
std::vector<bool> class_tokens_mask; std::vector<bool> class_tokens_mask;
if (sd_ctx->sd->pmid_model && photomaker_reference!=nullptr) if (sd_ctx->sd->pmid_model && photomaker_references.size()>0)
{ {
sd_ctx->sd->stacked_id = true; //turn on photomaker if needed sd_ctx->sd->stacked_id = true; //turn on photomaker if needed
} }
@ -1512,26 +1512,29 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
} }
} }
// handle single photomaker image passed in by kcpp // handle multiple photomaker image passed in by kcpp
if (sd_ctx->sd->pmid_model && photomaker_reference!=nullptr) if (sd_ctx->sd->pmid_model && photomaker_references.size()>0)
{ {
int c = 0; for(int i=0;i<photomaker_references.size();++i)
int width, height; {
width = photomaker_reference->width; int c = 0;
height = photomaker_reference->height; int width, height;
c = photomaker_reference->channel; width = photomaker_references[i]->width;
uint8_t* input_image_buffer = photomaker_reference->data; height = photomaker_references[i]->height;
sd_image_t* input_image = NULL; c = photomaker_references[i]->channel;
input_image = new sd_image_t{(uint32_t)width, uint8_t* input_image_buffer = photomaker_references[i]->data;
(uint32_t)height, sd_image_t* input_image = NULL;
3, input_image = new sd_image_t{(uint32_t)width,
input_image_buffer}; (uint32_t)height,
input_image = preprocess_id_image(input_image); 3,
if (input_image == NULL) { input_image_buffer};
LOG_ERROR("\npreprocess input id image from kcpp photomaker failed\n"); input_image = preprocess_id_image(input_image);
} else { if (input_image == NULL) {
LOG_INFO("\nPhotoMaker loaded image from kcpp\n"); LOG_ERROR("\npreprocess input id image from kcpp photomaker failed\n");
input_id_images.push_back(input_image); } else {
LOG_INFO("\nPhotoMaker loaded image from kcpp\n");
input_id_images.push_back(input_image);
}
} }
} }
@ -1790,7 +1793,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
float slg_scale = 0, float slg_scale = 0,
float skip_layer_start = 0.01, float skip_layer_start = 0.01,
float skip_layer_end = 0.2, float skip_layer_end = 0.2,
const sd_image_t* photomaker_reference = nullptr) { const std::vector<sd_image_t*> photomaker_references = std::vector<sd_image_t*>()) {
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count); std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
LOG_DEBUG("txt2img %dx%d", width, height); LOG_DEBUG("txt2img %dx%d", width, height);
if (sd_ctx == NULL) { if (sd_ctx == NULL) {
@ -1887,7 +1890,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
skip_layer_start, skip_layer_start,
skip_layer_end, skip_layer_end,
nullptr, nullptr,
photomaker_reference); photomaker_references);
size_t t1 = ggml_time_ms(); size_t t1 = ggml_time_ms();
@ -1924,7 +1927,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
float slg_scale = 0, float slg_scale = 0,
float skip_layer_start = 0.01, float skip_layer_start = 0.01,
float skip_layer_end = 0.2, float skip_layer_end = 0.2,
const sd_image_t* photomaker_reference = nullptr) { const std::vector<sd_image_t*> photomaker_references = std::vector<sd_image_t*>()) {
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count); std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
LOG_DEBUG("img2img %dx%d", width, height); LOG_DEBUG("img2img %dx%d", width, height);
if (sd_ctx == NULL) { if (sd_ctx == NULL) {
@ -2089,7 +2092,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
skip_layer_start, skip_layer_start,
skip_layer_end, skip_layer_end,
masked_image, masked_image,
photomaker_reference); photomaker_references);
size_t t2 = ggml_time_ms(); size_t t2 = ggml_time_ms();

View file

@ -183,7 +183,7 @@ SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx,
float slg_scale, float slg_scale,
float skip_layer_start, float skip_layer_start,
float skip_layer_end, float skip_layer_end,
const sd_image_t* photomaker_reference); const std::vector<sd_image_t*> photomaker_references);
SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx, SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
sd_image_t init_image, sd_image_t init_image,
@ -213,7 +213,7 @@ SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
float slg_scale, float slg_scale,
float skip_layer_start, float skip_layer_start,
float skip_layer_end, float skip_layer_end,
const sd_image_t* photomaker_reference); const std::vector<sd_image_t*> photomaker_references);
SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx, SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
sd_image_t init_image, sd_image_t init_image,