mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-15 03:19:41 +00:00
now accept multiple images for reference images
This commit is contained in:
parent
2e14338455
commit
4ec0e0fd21
6 changed files with 128 additions and 76 deletions
3
expose.h
3
expose.h
|
@ -181,7 +181,8 @@ struct sd_generation_inputs
|
|||
const char * negative_prompt = nullptr;
|
||||
const char * init_images = "";
|
||||
const char * mask = "";
|
||||
const char * extra_image = "";
|
||||
const int extra_images_len = 0;
|
||||
const char ** extra_images = nullptr;
|
||||
const bool flip_mask = false;
|
||||
const float denoising_strength = 0.0f;
|
||||
const float cfg_scale = 0.0f;
|
||||
|
|
File diff suppressed because one or more lines are too long
14
koboldcpp.py
14
koboldcpp.py
|
@ -59,6 +59,7 @@ stop_token_max = 256
|
|||
ban_token_max = 768
|
||||
logit_bias_max = 512
|
||||
dry_seq_break_max = 128
|
||||
extra_images_max = 4
|
||||
|
||||
# global vars
|
||||
KcppVersion = "1.94.2"
|
||||
|
@ -291,7 +292,8 @@ class sd_generation_inputs(ctypes.Structure):
|
|||
("negative_prompt", ctypes.c_char_p),
|
||||
("init_images", ctypes.c_char_p),
|
||||
("mask", ctypes.c_char_p),
|
||||
("extra_image", ctypes.c_char_p),
|
||||
("extra_images_len", ctypes.c_int),
|
||||
("extra_images", ctypes.POINTER(ctypes.c_char_p)),
|
||||
("flip_mask", ctypes.c_bool),
|
||||
("denoising_strength", ctypes.c_float),
|
||||
("cfg_scale", ctypes.c_float),
|
||||
|
@ -1714,7 +1716,9 @@ def sd_generate(genparams):
|
|||
seed = random.randint(100000, 999999)
|
||||
sample_method = genparams.get("sampler_name", "k_euler_a")
|
||||
clip_skip = tryparseint(genparams.get("clip_skip", -1),-1)
|
||||
extra_image = strip_base64_prefix(genparams.get("extra_image", ""))
|
||||
extra_images_arr = genparams.get("extra_images", [])
|
||||
extra_images_arr = ([] if not extra_images_arr else extra_images_arr)
|
||||
extra_images_arr = extra_images_arr[:extra_images_max]
|
||||
|
||||
#clean vars
|
||||
cfg_scale = (1 if cfg_scale < 1 else (25 if cfg_scale > 25 else cfg_scale))
|
||||
|
@ -1728,7 +1732,11 @@ def sd_generate(genparams):
|
|||
inputs.negative_prompt = negative_prompt.encode("UTF-8")
|
||||
inputs.init_images = init_images.encode("UTF-8")
|
||||
inputs.mask = "".encode("UTF-8") if not mask else mask.encode("UTF-8")
|
||||
inputs.extra_image = "".encode("UTF-8") if not extra_image else extra_image.encode("UTF-8")
|
||||
inputs.extra_images_len = len(extra_images_arr)
|
||||
inputs.extra_images = (ctypes.c_char_p * inputs.extra_images_len)()
|
||||
for n, estr in enumerate(extra_images_arr):
|
||||
extra_image = strip_base64_prefix(estr)
|
||||
inputs.extra_images[n] = extra_image.encode("UTF-8")
|
||||
inputs.flip_mask = flip_mask
|
||||
inputs.cfg_scale = cfg_scale
|
||||
inputs.denoising_strength = denoising_strength
|
||||
|
|
|
@ -116,7 +116,8 @@ static int sddebugmode = 0;
|
|||
static std::string recent_data = "";
|
||||
static uint8_t * input_image_buffer = NULL;
|
||||
static uint8_t * input_mask_buffer = NULL;
|
||||
static uint8_t * input_extraimage_buffer = NULL;
|
||||
static std::vector<uint8_t *> input_extraimage_buffers;
|
||||
const int max_extra_images = 4;
|
||||
|
||||
static std::string sdplatformenv, sddeviceenv, sdvulkandeviceenv;
|
||||
static int cfg_tiled_vae_threshold = 0;
|
||||
|
@ -288,8 +289,9 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
sd_ctx->sd->apply_lora_from_file(lorafilename,inputs.lora_multiplier);
|
||||
}
|
||||
|
||||
return true;
|
||||
input_extraimage_buffers.reserve(max_extra_images);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string clean_input_prompt(const std::string& input) {
|
||||
|
@ -434,7 +436,12 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
std::string cleannegprompt = clean_input_prompt(inputs.negative_prompt);
|
||||
std::string img2img_data = std::string(inputs.init_images);
|
||||
std::string img2img_mask = std::string(inputs.mask);
|
||||
std::string extra_image_data = std::string(inputs.extra_image);
|
||||
std::vector<std::string> extra_image_data;
|
||||
for(int i=0;i<inputs.extra_images_len;++i)
|
||||
{
|
||||
extra_image_data.push_back(std::string(inputs.extra_images[i]));
|
||||
}
|
||||
|
||||
std::string sampler = inputs.sample_method;
|
||||
|
||||
sd_params->prompt = cleanprompt;
|
||||
|
@ -503,17 +510,20 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
|
||||
//for img2img
|
||||
sd_image_t input_image = {0,0,0,nullptr};
|
||||
sd_image_t extraimage_reference = {0,0,0,nullptr};
|
||||
std::vector<sd_image_t> extraimage_references;
|
||||
extraimage_references.reserve(max_extra_images);
|
||||
std::vector<uint8_t> image_buffer;
|
||||
std::vector<uint8_t> image_mask_buffer;
|
||||
std::vector<uint8_t> extraimage_buffer;
|
||||
std::vector<std::vector<uint8_t>> extraimage_buffers;
|
||||
extraimage_buffers.reserve(max_extra_images);
|
||||
|
||||
int nx, ny, nc;
|
||||
int img2imgW = sd_params->width; //for img2img input
|
||||
int img2imgH = sd_params->height;
|
||||
int img2imgC = 3; // Assuming RGB image
|
||||
std::vector<uint8_t> resized_image_buf(img2imgW * img2imgH * img2imgC);
|
||||
std::vector<uint8_t> resized_mask_buf(img2imgW * img2imgH * img2imgC);
|
||||
std::vector<uint8_t> resized_extraimage_buf(img2imgW * img2imgH * img2imgC);
|
||||
std::vector<std::vector<uint8_t>> resized_extraimage_bufs(max_extra_images, std::vector<uint8_t>(img2imgW * img2imgH * img2imgC));
|
||||
|
||||
std::string ts = get_timestamp_str();
|
||||
if(!sd_is_quiet)
|
||||
|
@ -558,29 +568,39 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
sd_params->sample_method = sample_method_t::EULER_A;
|
||||
}
|
||||
|
||||
if(extra_image_data!="")
|
||||
if(extra_image_data.size()>0)
|
||||
{
|
||||
if(input_extraimage_buffer!=nullptr) //just in time free old buffer
|
||||
if(input_extraimage_buffers.size()>0) //just in time free old buffer
|
||||
{
|
||||
stbi_image_free(input_extraimage_buffer);
|
||||
input_extraimage_buffer = nullptr;
|
||||
for(int i=0;i<input_extraimage_buffers.size();++i)
|
||||
{
|
||||
stbi_image_free(input_extraimage_buffers[i]);
|
||||
}
|
||||
input_extraimage_buffers.clear();
|
||||
}
|
||||
extraimage_buffers.clear();
|
||||
extraimage_references.clear();
|
||||
for(int i=0;i<extra_image_data.size() && i<max_extra_images;++i)
|
||||
{
|
||||
int nx2, ny2, nc2;
|
||||
int desiredchannels = 3;
|
||||
extraimage_buffer = kcpp_base64_decode(extra_image_data);
|
||||
input_extraimage_buffer = stbi_load_from_memory(extraimage_buffer.data(), extraimage_buffer.size(), &nx2, &ny2, &nc2, desiredchannels);
|
||||
extraimage_buffers.push_back(kcpp_base64_decode(extra_image_data[i]));
|
||||
input_extraimage_buffers.push_back(stbi_load_from_memory(extraimage_buffers[i].data(), extraimage_buffers[i].size(), &nx2, &ny2, &nc2, desiredchannels));
|
||||
// Resize the image
|
||||
int resok = stbir_resize_uint8(input_extraimage_buffer, nx2, ny2, 0, resized_extraimage_buf.data(), img2imgW, img2imgH, 0, desiredchannels);
|
||||
int resok = stbir_resize_uint8(input_extraimage_buffers[i], nx2, ny2, 0, resized_extraimage_bufs[i].data(), img2imgW, img2imgH, 0, desiredchannels);
|
||||
if (!resok) {
|
||||
printf("\nKCPP SD: resize extra image failed!\n");
|
||||
output.data = "";
|
||||
output.status = 0;
|
||||
return output;
|
||||
}
|
||||
sd_image_t extraimage_reference;
|
||||
extraimage_reference.width = img2imgW;
|
||||
extraimage_reference.height = img2imgH;
|
||||
extraimage_reference.channel = desiredchannels;
|
||||
extraimage_reference.data = resized_extraimage_buf.data();
|
||||
extraimage_reference.data = resized_extraimage_bufs[i].data();
|
||||
extraimage_references.push_back(extraimage_reference);
|
||||
}
|
||||
|
||||
//ensure prompt has img keyword, otherwise append it
|
||||
if(photomaker_enabled)
|
||||
|
@ -595,9 +615,29 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
}
|
||||
|
||||
std::vector<sd_image_t> kontext_imgs;
|
||||
if(extra_image_data!="" && loadedsdver==SDVersion::VERSION_FLUX && !sd_loaded_chroma())
|
||||
if(extra_image_data.size()>0 && loadedsdver==SDVersion::VERSION_FLUX && !sd_loaded_chroma())
|
||||
{
|
||||
kontext_imgs.push_back(extraimage_reference);
|
||||
for(int i=0;i<extra_image_data.size();++i)
|
||||
{
|
||||
kontext_imgs.push_back(extraimage_references[i]);
|
||||
}
|
||||
if(!sd_is_quiet && sddebugmode==1)
|
||||
{
|
||||
printf("\nFlux Kontext: Using %d reference images\n",kontext_imgs.size());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<sd_image_t*> photomaker_imgs;
|
||||
if(photomaker_enabled && extra_image_data.size()>0)
|
||||
{
|
||||
for(int i=0;i<extra_image_data.size();++i)
|
||||
{
|
||||
photomaker_imgs.push_back(&extraimage_references[i]);
|
||||
}
|
||||
if(!sd_is_quiet && sddebugmode==1)
|
||||
{
|
||||
printf("\nPhotomaker: Using %d reference images\n",photomaker_imgs.size());
|
||||
}
|
||||
}
|
||||
|
||||
if (sd_params->mode == TXT2IMG) {
|
||||
|
@ -644,7 +684,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
sd_params->slg_scale,
|
||||
sd_params->skip_layer_start,
|
||||
sd_params->skip_layer_end,
|
||||
(photomaker_enabled && extra_image_data!=""?(&extraimage_reference):nullptr));
|
||||
photomaker_imgs);
|
||||
} else {
|
||||
|
||||
if (sd_params->width <= 0 || sd_params->width % 64 != 0 || sd_params->height <= 0 || sd_params->height % 64 != 0) {
|
||||
|
@ -769,7 +809,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
sd_params->slg_scale,
|
||||
sd_params->skip_layer_start,
|
||||
sd_params->skip_layer_end,
|
||||
(photomaker_enabled && extra_image_data!=""?(&extraimage_reference):nullptr));
|
||||
photomaker_imgs);
|
||||
}
|
||||
|
||||
if (results == NULL) {
|
||||
|
|
|
@ -1422,7 +1422,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
|
|||
float skip_layer_start = 0.01,
|
||||
float skip_layer_end = 0.2,
|
||||
ggml_tensor* masked_image = NULL,
|
||||
const sd_image_t* photomaker_reference = nullptr) {
|
||||
const std::vector<sd_image_t*> photomaker_references = std::vector<sd_image_t*>()) {
|
||||
if (seed < 0) {
|
||||
// Generally, when using the provided command line, the seed is always >0.
|
||||
// However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library
|
||||
|
@ -1465,7 +1465,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
|
|||
ggml_tensor* init_img = NULL;
|
||||
SDCondition id_cond;
|
||||
std::vector<bool> class_tokens_mask;
|
||||
if (sd_ctx->sd->pmid_model && photomaker_reference!=nullptr)
|
||||
if (sd_ctx->sd->pmid_model && photomaker_references.size()>0)
|
||||
{
|
||||
sd_ctx->sd->stacked_id = true; //turn on photomaker if needed
|
||||
}
|
||||
|
@ -1512,15 +1512,17 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
|
|||
}
|
||||
}
|
||||
|
||||
// handle single photomaker image passed in by kcpp
|
||||
if (sd_ctx->sd->pmid_model && photomaker_reference!=nullptr)
|
||||
// handle multiple photomaker image passed in by kcpp
|
||||
if (sd_ctx->sd->pmid_model && photomaker_references.size()>0)
|
||||
{
|
||||
for(int i=0;i<photomaker_references.size();++i)
|
||||
{
|
||||
int c = 0;
|
||||
int width, height;
|
||||
width = photomaker_reference->width;
|
||||
height = photomaker_reference->height;
|
||||
c = photomaker_reference->channel;
|
||||
uint8_t* input_image_buffer = photomaker_reference->data;
|
||||
width = photomaker_references[i]->width;
|
||||
height = photomaker_references[i]->height;
|
||||
c = photomaker_references[i]->channel;
|
||||
uint8_t* input_image_buffer = photomaker_references[i]->data;
|
||||
sd_image_t* input_image = NULL;
|
||||
input_image = new sd_image_t{(uint32_t)width,
|
||||
(uint32_t)height,
|
||||
|
@ -1534,6 +1536,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
|
|||
input_id_images.push_back(input_image);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (input_id_images.size() > 0) {
|
||||
sd_ctx->sd->pmid_model->style_strength = style_ratio;
|
||||
|
@ -1790,7 +1793,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
|
|||
float slg_scale = 0,
|
||||
float skip_layer_start = 0.01,
|
||||
float skip_layer_end = 0.2,
|
||||
const sd_image_t* photomaker_reference = nullptr) {
|
||||
const std::vector<sd_image_t*> photomaker_references = std::vector<sd_image_t*>()) {
|
||||
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
|
||||
LOG_DEBUG("txt2img %dx%d", width, height);
|
||||
if (sd_ctx == NULL) {
|
||||
|
@ -1887,7 +1890,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
|
|||
skip_layer_start,
|
||||
skip_layer_end,
|
||||
nullptr,
|
||||
photomaker_reference);
|
||||
photomaker_references);
|
||||
|
||||
size_t t1 = ggml_time_ms();
|
||||
|
||||
|
@ -1924,7 +1927,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
|
|||
float slg_scale = 0,
|
||||
float skip_layer_start = 0.01,
|
||||
float skip_layer_end = 0.2,
|
||||
const sd_image_t* photomaker_reference = nullptr) {
|
||||
const std::vector<sd_image_t*> photomaker_references = std::vector<sd_image_t*>()) {
|
||||
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
|
||||
LOG_DEBUG("img2img %dx%d", width, height);
|
||||
if (sd_ctx == NULL) {
|
||||
|
@ -2089,7 +2092,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
|
|||
skip_layer_start,
|
||||
skip_layer_end,
|
||||
masked_image,
|
||||
photomaker_reference);
|
||||
photomaker_references);
|
||||
|
||||
size_t t2 = ggml_time_ms();
|
||||
|
||||
|
|
|
@ -183,7 +183,7 @@ SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx,
|
|||
float slg_scale,
|
||||
float skip_layer_start,
|
||||
float skip_layer_end,
|
||||
const sd_image_t* photomaker_reference);
|
||||
const std::vector<sd_image_t*> photomaker_references);
|
||||
|
||||
SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
|
||||
sd_image_t init_image,
|
||||
|
@ -213,7 +213,7 @@ SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
|
|||
float slg_scale,
|
||||
float skip_layer_start,
|
||||
float skip_layer_end,
|
||||
const sd_image_t* photomaker_reference);
|
||||
const std::vector<sd_image_t*> photomaker_references);
|
||||
|
||||
SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
|
||||
sd_image_t init_image,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue