mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-07 00:41:50 +00:00
compact UI launcher, WIP handling video outputs
This commit is contained in:
parent
cd6c771bd9
commit
553890f975
4 changed files with 220 additions and 15 deletions
1
expose.h
1
expose.h
|
|
@ -200,6 +200,7 @@ struct sd_generation_inputs
|
|||
const int seed = 0;
|
||||
const char * sample_method = nullptr;
|
||||
const int clip_skip = -1;
|
||||
const int vid_req_frames = 1;
|
||||
};
|
||||
struct sd_generation_outputs
|
||||
{
|
||||
|
|
|
|||
16
koboldcpp.py
16
koboldcpp.py
|
|
@ -313,7 +313,8 @@ class sd_generation_inputs(ctypes.Structure):
|
|||
("height", ctypes.c_int),
|
||||
("seed", ctypes.c_int),
|
||||
("sample_method", ctypes.c_char_p),
|
||||
("clip_skip", ctypes.c_int)]
|
||||
("clip_skip", ctypes.c_int),
|
||||
("vid_req_frames", ctypes.c_int)]
|
||||
|
||||
class sd_generation_outputs(ctypes.Structure):
|
||||
_fields_ = [("status", ctypes.c_int),
|
||||
|
|
@ -1815,6 +1816,8 @@ def sd_generate(genparams):
|
|||
seed = random.randint(100000, 999999)
|
||||
sample_method = genparams.get("sampler_name", "k_euler_a")
|
||||
clip_skip = tryparseint(genparams.get("clip_skip", -1),-1)
|
||||
vid_req_frames = tryparseint(genparams.get("frames", 1),1)
|
||||
vid_req_frames = 1 if (not vid_req_frames or vid_req_frames < 1) else vid_req_frames
|
||||
extra_images_arr = genparams.get("extra_images", [])
|
||||
extra_images_arr = ([] if not extra_images_arr else extra_images_arr)
|
||||
extra_images_arr = [img for img in extra_images_arr if img not in (None, "")]
|
||||
|
|
@ -1846,6 +1849,7 @@ def sd_generate(genparams):
|
|||
inputs.seed = seed
|
||||
inputs.sample_method = sample_method.lower().encode("UTF-8")
|
||||
inputs.clip_skip = clip_skip
|
||||
inputs.vid_req_frames = vid_req_frames
|
||||
ret = handle.sd_generate(inputs)
|
||||
outstr = ""
|
||||
if ret.status==1:
|
||||
|
|
@ -5397,13 +5401,13 @@ def show_gui():
|
|||
makefileentry(images_tab, "Image Gen. Model (safetensors/gguf):", "Select Image Gen Model File", sd_model_var, 1, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")], tooltiptxt="Select a .safetensors or .gguf Image Generation model file on disk to be loaded.")
|
||||
makelabelentry(images_tab, "Clamp Resolution Limit (Hard):", sd_clamped_var, 4, 50, padx=190,singleline=True,tooltip="Limit generation steps and output image size for shared use.\nSet to 0 to disable, otherwise value is clamped to the max size limit (min 512px).")
|
||||
makelabelentry(images_tab, "(Soft):", sd_clamped_soft_var, 4, 50, padx=290,singleline=True,tooltip="Square image size restriction, to protect the server against memory crashes.\nAllows width-height tradeoffs, eg. 640 allows 640x640 and 512x768\nLeave at 0 for the default value: 832 for SD1.5/SD2, 1024 otherwise.",labelpadx=250)
|
||||
makelabelentry(images_tab, "Image Threads:" , sd_threads_var, 8, 50,padx=290,singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.")
|
||||
makelabelentry(images_tab, "ImgThreads:" , sd_threads_var, 8, 50,padx=290,singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.",labelpadx=210)
|
||||
sd_model_var.trace_add("write", gui_changed_modelfile)
|
||||
makelabelcombobox(images_tab, "Compress Weights (Saves Memory): ", sd_quant_var, 10, width=60, padx=220, labelpadx=8, tooltiptxt="Quantizes the SD model weights to save memory.\nHigher levels save more memory, and cause more quality degradation.", values=sd_quant_choices)
|
||||
makelabelcombobox(images_tab, "Compress Weights: ", sd_quant_var, 8, width=60, padx=126, labelpadx=8, tooltiptxt="Quantizes the SD model weights to save memory.\nHigher levels save more memory, and cause more quality degradation.", values=sd_quant_choices)
|
||||
sd_quant_var.trace_add("write", changed_gpulayers_estimate)
|
||||
|
||||
makefileentry(images_tab, "Image LoRA (safetensors/gguf):", "Select SD lora file",sd_lora_var, 20, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded. Should be unquantized!")
|
||||
makelabelentry(images_tab, "Image LoRA Multiplier:" , sd_loramult_var, 22, 50,padx=290,singleline=True,tooltip="What mutiplier value to apply the SD LoRA with.")
|
||||
makefileentry(images_tab, "Image LoRA:", "Select SD lora file",sd_lora_var, 20, width=160, singlerow=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded. Should be unquantized!")
|
||||
makelabelentry(images_tab, "Multiplier:" , sd_loramult_var, 20, 50,padx=390,singleline=True,tooltip="What mutiplier value to apply the SD LoRA with.",labelpadx=330)
|
||||
|
||||
makefileentry(images_tab, "T5-XXL File:", "Select Optional T5-XXL model file (SD3 or flux)",sd_t5xxl_var, 24, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.")
|
||||
makefileentry(images_tab, "Clip-L File:", "Select Optional Clip-L model file (SD3 or flux)",sd_clipl_var, 26, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.")
|
||||
|
|
@ -5424,7 +5428,7 @@ def show_gui():
|
|||
makecheckbox(images_tab, "TAE SD (AutoFix Broken VAE)", sd_vaeauto_var, 42,command=toggletaesd,tooltiptxt="Replace VAE with TAESD. May fix bad VAE.")
|
||||
makelabelcombobox(images_tab, "Conv2D Direct:", sd_convdirect_var, row=42, labelpadx=220, padx=310, width=90, tooltiptxt="Use Conv2D Direct operation. May save memory or improve performance.\nMight crash if not supported by the backend.\n", values=sd_convdirect_choices)
|
||||
makelabelentry(images_tab, "VAE Tiling Threshold:", sd_tiled_vae_var, 44, 50, padx=144,singleline=True,tooltip="Enable VAE Tiling for images above this size, to save memory.\nSet to 0 to disable VAE tiling.")
|
||||
makecheckbox(images_tab, "SD Flash Attention", sd_flash_attention_var, 46, tooltiptxt="Enable Flash Attention for image diffusion. May save memory or improve performance.")
|
||||
makecheckbox(images_tab, "SD Flash Attention", sd_flash_attention_var, 44,padx=230, tooltiptxt="Enable Flash Attention for image diffusion. May save memory or improve performance.")
|
||||
|
||||
# audio tab
|
||||
audio_tab = tabcontent["Audio"]
|
||||
|
|
|
|||
|
|
@ -214,4 +214,180 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//// KCPP PART FOR WRITING AVI TO MEMORY
|
||||
|
||||
typedef struct {
|
||||
uint8_t* data;
|
||||
size_t size;
|
||||
} mem_buffer_t;
|
||||
|
||||
// Append raw bytes
|
||||
static void mem_write(mem_buffer_t* buf, const void* data, size_t size) {
|
||||
buf->data = (uint8_t*)realloc(buf->data, buf->size + size);
|
||||
memcpy(buf->data + buf->size, data, size);
|
||||
buf->size += size;
|
||||
}
|
||||
|
||||
// Write 32-bit LE
|
||||
static void mem_write_u32_le(mem_buffer_t* buf, uint32_t val) {
|
||||
mem_write(buf, &val, 4);
|
||||
}
|
||||
|
||||
// Write 16-bit LE
|
||||
static void mem_write_u16_le(mem_buffer_t* buf, uint16_t val) {
|
||||
mem_write(buf, &val, 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create MJPG AVI file in memory and return as base64 string.
|
||||
* Returns 0 on success, -1 on failure
|
||||
* must be freed by caller after use
|
||||
*/
|
||||
int create_mjpg_avi_membuf_from_sd_images(sd_image_t* images, int num_images, int fps, int quality, uint8_t** out_data, size_t *out_len)
|
||||
{
|
||||
if (num_images == 0) {
|
||||
fprintf(stderr, "Error: Image array is empty.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
mem_buffer_t buf = {NULL, 0};
|
||||
uint32_t width = images[0].width;
|
||||
uint32_t height = images[0].height;
|
||||
uint32_t channels = images[0].channel;
|
||||
|
||||
if (channels != 3 && channels != 4) {
|
||||
fprintf(stderr, "Error: Unsupported channel count: %u\n", channels);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// --- RIFF AVI Header ---
|
||||
mem_write(&buf, "RIFF", 4);
|
||||
size_t riff_size_pos = buf.size;
|
||||
mem_write_u32_le(&buf, 0); // placeholder
|
||||
mem_write(&buf, "AVI ", 4);
|
||||
|
||||
// 'hdrl' LIST
|
||||
mem_write(&buf, "LIST", 4);
|
||||
mem_write_u32_le(&buf, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40);
|
||||
mem_write(&buf, "hdrl", 4);
|
||||
|
||||
// 'avih'
|
||||
mem_write(&buf, "avih", 4);
|
||||
mem_write_u32_le(&buf, 56);
|
||||
mem_write_u32_le(&buf, 1000000 / fps);
|
||||
mem_write_u32_le(&buf, 0);
|
||||
mem_write_u32_le(&buf, 0);
|
||||
mem_write_u32_le(&buf, 0x110);
|
||||
mem_write_u32_le(&buf, num_images);
|
||||
mem_write_u32_le(&buf, 0);
|
||||
mem_write_u32_le(&buf, 1);
|
||||
mem_write_u32_le(&buf, width * height * 3);
|
||||
mem_write_u32_le(&buf, width);
|
||||
mem_write_u32_le(&buf, height);
|
||||
mem_write_u32_le(&buf, 0); mem_write_u32_le(&buf, 0);
|
||||
mem_write_u32_le(&buf, 0); mem_write_u32_le(&buf, 0);
|
||||
|
||||
// 'strl' LIST
|
||||
mem_write(&buf, "LIST", 4);
|
||||
mem_write_u32_le(&buf, 4 + 8 + 56 + 8 + 40);
|
||||
mem_write(&buf, "strl", 4);
|
||||
|
||||
// 'strh'
|
||||
mem_write(&buf, "strh", 4);
|
||||
mem_write_u32_le(&buf, 56);
|
||||
mem_write(&buf, "vids", 4);
|
||||
mem_write(&buf, "MJPG", 4);
|
||||
mem_write_u32_le(&buf, 0);
|
||||
mem_write_u16_le(&buf, 0);
|
||||
mem_write_u16_le(&buf, 0);
|
||||
mem_write_u32_le(&buf, 0);
|
||||
mem_write_u32_le(&buf, 1);
|
||||
mem_write_u32_le(&buf, fps);
|
||||
mem_write_u32_le(&buf, 0);
|
||||
mem_write_u32_le(&buf, num_images);
|
||||
mem_write_u32_le(&buf, width * height * 3);
|
||||
mem_write_u32_le(&buf, (uint32_t)-1);
|
||||
mem_write_u32_le(&buf, 0);
|
||||
mem_write_u16_le(&buf, 0); mem_write_u16_le(&buf, 0);
|
||||
mem_write_u16_le(&buf, 0); mem_write_u16_le(&buf, 0);
|
||||
|
||||
// 'strf'
|
||||
mem_write(&buf, "strf", 4);
|
||||
mem_write_u32_le(&buf, 40);
|
||||
mem_write_u32_le(&buf, 40);
|
||||
mem_write_u32_le(&buf, width);
|
||||
mem_write_u32_le(&buf, height);
|
||||
mem_write_u16_le(&buf, 1);
|
||||
mem_write_u16_le(&buf, 24);
|
||||
mem_write(&buf, "MJPG", 4);
|
||||
mem_write_u32_le(&buf, width * height * 3);
|
||||
mem_write_u32_le(&buf, 0);
|
||||
mem_write_u32_le(&buf, 0);
|
||||
mem_write_u32_le(&buf, 0);
|
||||
mem_write_u32_le(&buf, 0);
|
||||
|
||||
// 'movi' LIST
|
||||
mem_write(&buf, "LIST", 4);
|
||||
size_t movi_size_pos = buf.size;
|
||||
mem_write_u32_le(&buf, 0);
|
||||
mem_write(&buf, "movi", 4);
|
||||
|
||||
avi_index_entry* index = (avi_index_entry*)malloc(sizeof(avi_index_entry) * num_images);
|
||||
|
||||
// Encode and write each frame
|
||||
for (int i = 0; i < num_images; i++) {
|
||||
struct {
|
||||
uint8_t* buf;
|
||||
size_t size;
|
||||
} jpeg_data = {NULL, 0};
|
||||
|
||||
auto write_to_buf = [](void* context, void* data, int size) {
|
||||
auto jd = (decltype(jpeg_data)*)context;
|
||||
jd->buf = (uint8_t*)realloc(jd->buf, jd->size + size);
|
||||
memcpy(jd->buf + jd->size, data, size);
|
||||
jd->size += size;
|
||||
};
|
||||
|
||||
stbi_write_jpg_to_func(
|
||||
write_to_buf, &jpeg_data,
|
||||
images[i].width, images[i].height,
|
||||
channels, images[i].data, quality
|
||||
);
|
||||
|
||||
mem_write(&buf, "00dc", 4);
|
||||
mem_write_u32_le(&buf, jpeg_data.size);
|
||||
index[i].offset = buf.size - 8;
|
||||
index[i].size = jpeg_data.size;
|
||||
mem_write(&buf, jpeg_data.buf, jpeg_data.size);
|
||||
if (jpeg_data.size % 2) mem_write(&buf, "\0", 1);
|
||||
|
||||
free(jpeg_data.buf);
|
||||
}
|
||||
|
||||
// finalize movi size
|
||||
uint32_t movi_size = buf.size - movi_size_pos - 4;
|
||||
memcpy(buf.data + movi_size_pos, &movi_size, 4);
|
||||
|
||||
// write idx1
|
||||
mem_write(&buf, "idx1", 4);
|
||||
mem_write_u32_le(&buf, num_images * 16);
|
||||
for (int i = 0; i < num_images; i++) {
|
||||
mem_write(&buf, "00dc", 4);
|
||||
mem_write_u32_le(&buf, 0x10);
|
||||
mem_write_u32_le(&buf, index[i].offset);
|
||||
mem_write_u32_le(&buf, index[i].size);
|
||||
}
|
||||
|
||||
// finalize RIFF size
|
||||
uint32_t riff_size = buf.size - riff_size_pos - 4;
|
||||
memcpy(buf.data + riff_size_pos, &riff_size, 4);
|
||||
|
||||
free(index);
|
||||
|
||||
*out_data = buf.data;
|
||||
*out_len = buf.size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // __AVI_WRITER_H__
|
||||
|
|
@ -37,6 +37,8 @@
|
|||
// #define STB_IMAGE_RESIZE_IMPLEMENTATION //already defined in llava
|
||||
#include "stb_image_resize.h"
|
||||
|
||||
#include "avi_writer.h"
|
||||
|
||||
static_assert((int)SD_TYPE_COUNT == (int)GGML_TYPE_COUNT,
|
||||
"inconsistency between SD_TYPE_COUNT and GGML_TYPE_COUNT");
|
||||
|
||||
|
|
@ -721,9 +723,12 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
params.pm_params.id_images = photomaker_imgs.data();
|
||||
params.pm_params.id_images_count = photomaker_imgs.size();
|
||||
|
||||
//the below params are only used in video models. May move into standalone object in future
|
||||
int vid_req_frames = inputs.vid_req_frames;
|
||||
int generated_num_results = 1;
|
||||
|
||||
if(is_vid_model)
|
||||
{
|
||||
int num_results = 1;
|
||||
std::vector<sd_image_t> control_frames; //empty for now
|
||||
sd_vid_gen_params_t vid_gen_params = {};
|
||||
sd_vid_gen_params_init (&vid_gen_params);
|
||||
|
|
@ -737,8 +742,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
vid_gen_params.sample_params = params.sample_params;
|
||||
vid_gen_params.strength = params.strength;
|
||||
vid_gen_params.seed = params.seed;
|
||||
vid_gen_params.video_frames = 1;
|
||||
if(!sd_is_quiet && sddebugmode==1)
|
||||
vid_gen_params.video_frames = vid_req_frames;
|
||||
if(!sd_is_quiet && sddebugmode==1)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "\nVID PROMPT:" << vid_gen_params.prompt
|
||||
|
|
@ -755,7 +760,11 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
}
|
||||
|
||||
fflush(stdout);
|
||||
results = generate_video(sd_ctx, &vid_gen_params, &num_results);
|
||||
results = generate_video(sd_ctx, &vid_gen_params, &generated_num_results);
|
||||
if(!sd_is_quiet && sddebugmode==1)
|
||||
{
|
||||
printf("\nRequested Vid Frames: %d, Generated Vid Frames: %d\n",vid_req_frames, generated_num_results);
|
||||
}
|
||||
}
|
||||
else if (!is_img2img)
|
||||
{
|
||||
|
|
@ -906,12 +915,27 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
continue;
|
||||
}
|
||||
|
||||
int out_data_len;
|
||||
unsigned char * png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params).c_str());
|
||||
if (png != NULL)
|
||||
//if multiframe, make a video
|
||||
if(vid_req_frames>1 && generated_num_results>1 && is_vid_model)
|
||||
{
|
||||
recent_data = kcpp_base64_encode(png,out_data_len);
|
||||
free(png);
|
||||
uint8_t * out_data = nullptr;
|
||||
size_t out_len = 0;
|
||||
int status = create_mjpg_avi_membuf_from_sd_images(results, generated_num_results, 24, 40, &out_data,&out_len);
|
||||
if(status==0)
|
||||
{
|
||||
recent_data = kcpp_base64_encode(out_data, out_len);
|
||||
free(out_data);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int out_data_len;
|
||||
unsigned char * png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params).c_str());
|
||||
if (png != NULL)
|
||||
{
|
||||
recent_data = kcpp_base64_encode(png,out_data_len);
|
||||
free(png);
|
||||
}
|
||||
}
|
||||
|
||||
free(results[i].data);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue