compact UI launcher, WIP handling video outputs

This commit is contained in:
Concedo 2025-09-28 17:24:04 +08:00
parent cd6c771bd9
commit 553890f975
4 changed files with 220 additions and 15 deletions

View file

@ -200,6 +200,7 @@ struct sd_generation_inputs
const int seed = 0;
const char * sample_method = nullptr;
const int clip_skip = -1;
const int vid_req_frames = 1;
};
struct sd_generation_outputs
{

View file

@ -313,7 +313,8 @@ class sd_generation_inputs(ctypes.Structure):
("height", ctypes.c_int),
("seed", ctypes.c_int),
("sample_method", ctypes.c_char_p),
("clip_skip", ctypes.c_int)]
("clip_skip", ctypes.c_int),
("vid_req_frames", ctypes.c_int)]
class sd_generation_outputs(ctypes.Structure):
_fields_ = [("status", ctypes.c_int),
@ -1815,6 +1816,8 @@ def sd_generate(genparams):
seed = random.randint(100000, 999999)
sample_method = genparams.get("sampler_name", "k_euler_a")
clip_skip = tryparseint(genparams.get("clip_skip", -1),-1)
vid_req_frames = tryparseint(genparams.get("frames", 1),1)
vid_req_frames = 1 if (not vid_req_frames or vid_req_frames < 1) else vid_req_frames
extra_images_arr = genparams.get("extra_images", [])
extra_images_arr = ([] if not extra_images_arr else extra_images_arr)
extra_images_arr = [img for img in extra_images_arr if img not in (None, "")]
@ -1846,6 +1849,7 @@ def sd_generate(genparams):
inputs.seed = seed
inputs.sample_method = sample_method.lower().encode("UTF-8")
inputs.clip_skip = clip_skip
inputs.vid_req_frames = vid_req_frames
ret = handle.sd_generate(inputs)
outstr = ""
if ret.status==1:
@ -5397,13 +5401,13 @@ def show_gui():
makefileentry(images_tab, "Image Gen. Model (safetensors/gguf):", "Select Image Gen Model File", sd_model_var, 1, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")], tooltiptxt="Select a .safetensors or .gguf Image Generation model file on disk to be loaded.")
makelabelentry(images_tab, "Clamp Resolution Limit (Hard):", sd_clamped_var, 4, 50, padx=190,singleline=True,tooltip="Limit generation steps and output image size for shared use.\nSet to 0 to disable, otherwise value is clamped to the max size limit (min 512px).")
makelabelentry(images_tab, "(Soft):", sd_clamped_soft_var, 4, 50, padx=290,singleline=True,tooltip="Square image size restriction, to protect the server against memory crashes.\nAllows width-height tradeoffs, eg. 640 allows 640x640 and 512x768\nLeave at 0 for the default value: 832 for SD1.5/SD2, 1024 otherwise.",labelpadx=250)
makelabelentry(images_tab, "Image Threads:" , sd_threads_var, 8, 50,padx=290,singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.")
makelabelentry(images_tab, "ImgThreads:" , sd_threads_var, 8, 50,padx=290,singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.",labelpadx=210)
sd_model_var.trace_add("write", gui_changed_modelfile)
makelabelcombobox(images_tab, "Compress Weights (Saves Memory): ", sd_quant_var, 10, width=60, padx=220, labelpadx=8, tooltiptxt="Quantizes the SD model weights to save memory.\nHigher levels save more memory, and cause more quality degradation.", values=sd_quant_choices)
makelabelcombobox(images_tab, "Compress Weights: ", sd_quant_var, 8, width=60, padx=126, labelpadx=8, tooltiptxt="Quantizes the SD model weights to save memory.\nHigher levels save more memory, and cause more quality degradation.", values=sd_quant_choices)
sd_quant_var.trace_add("write", changed_gpulayers_estimate)
makefileentry(images_tab, "Image LoRA (safetensors/gguf):", "Select SD lora file",sd_lora_var, 20, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded. Should be unquantized!")
makelabelentry(images_tab, "Image LoRA Multiplier:" , sd_loramult_var, 22, 50,padx=290,singleline=True,tooltip="What mutiplier value to apply the SD LoRA with.")
makefileentry(images_tab, "Image LoRA:", "Select SD lora file",sd_lora_var, 20, width=160, singlerow=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded. Should be unquantized!")
makelabelentry(images_tab, "Multiplier:" , sd_loramult_var, 20, 50,padx=390,singleline=True,tooltip="What mutiplier value to apply the SD LoRA with.",labelpadx=330)
makefileentry(images_tab, "T5-XXL File:", "Select Optional T5-XXL model file (SD3 or flux)",sd_t5xxl_var, 24, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.")
makefileentry(images_tab, "Clip-L File:", "Select Optional Clip-L model file (SD3 or flux)",sd_clipl_var, 26, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.")
@ -5424,7 +5428,7 @@ def show_gui():
makecheckbox(images_tab, "TAE SD (AutoFix Broken VAE)", sd_vaeauto_var, 42,command=toggletaesd,tooltiptxt="Replace VAE with TAESD. May fix bad VAE.")
makelabelcombobox(images_tab, "Conv2D Direct:", sd_convdirect_var, row=42, labelpadx=220, padx=310, width=90, tooltiptxt="Use Conv2D Direct operation. May save memory or improve performance.\nMight crash if not supported by the backend.\n", values=sd_convdirect_choices)
makelabelentry(images_tab, "VAE Tiling Threshold:", sd_tiled_vae_var, 44, 50, padx=144,singleline=True,tooltip="Enable VAE Tiling for images above this size, to save memory.\nSet to 0 to disable VAE tiling.")
makecheckbox(images_tab, "SD Flash Attention", sd_flash_attention_var, 46, tooltiptxt="Enable Flash Attention for image diffusion. May save memory or improve performance.")
makecheckbox(images_tab, "SD Flash Attention", sd_flash_attention_var, 44,padx=230, tooltiptxt="Enable Flash Attention for image diffusion. May save memory or improve performance.")
# audio tab
audio_tab = tabcontent["Audio"]

View file

@ -214,4 +214,180 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int
return 0;
}
//// KCPP PART FOR WRITING AVI TO MEMORY
typedef struct {
uint8_t* data;
size_t size;
} mem_buffer_t;
// Append raw bytes
static void mem_write(mem_buffer_t* buf, const void* data, size_t size) {
buf->data = (uint8_t*)realloc(buf->data, buf->size + size);
memcpy(buf->data + buf->size, data, size);
buf->size += size;
}
// Write 32-bit LE
static void mem_write_u32_le(mem_buffer_t* buf, uint32_t val) {
mem_write(buf, &val, 4);
}
// Write 16-bit LE
static void mem_write_u16_le(mem_buffer_t* buf, uint16_t val) {
mem_write(buf, &val, 2);
}
/**
* Create MJPG AVI file in memory and return as base64 string.
* Returns 0 on success, -1 on failure
* must be freed by caller after use
*/
int create_mjpg_avi_membuf_from_sd_images(sd_image_t* images, int num_images, int fps, int quality, uint8_t** out_data, size_t *out_len)
{
if (num_images == 0) {
fprintf(stderr, "Error: Image array is empty.\n");
return -1;
}
mem_buffer_t buf = {NULL, 0};
uint32_t width = images[0].width;
uint32_t height = images[0].height;
uint32_t channels = images[0].channel;
if (channels != 3 && channels != 4) {
fprintf(stderr, "Error: Unsupported channel count: %u\n", channels);
return -1;
}
// --- RIFF AVI Header ---
mem_write(&buf, "RIFF", 4);
size_t riff_size_pos = buf.size;
mem_write_u32_le(&buf, 0); // placeholder
mem_write(&buf, "AVI ", 4);
// 'hdrl' LIST
mem_write(&buf, "LIST", 4);
mem_write_u32_le(&buf, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40);
mem_write(&buf, "hdrl", 4);
// 'avih'
mem_write(&buf, "avih", 4);
mem_write_u32_le(&buf, 56);
mem_write_u32_le(&buf, 1000000 / fps);
mem_write_u32_le(&buf, 0);
mem_write_u32_le(&buf, 0);
mem_write_u32_le(&buf, 0x110);
mem_write_u32_le(&buf, num_images);
mem_write_u32_le(&buf, 0);
mem_write_u32_le(&buf, 1);
mem_write_u32_le(&buf, width * height * 3);
mem_write_u32_le(&buf, width);
mem_write_u32_le(&buf, height);
mem_write_u32_le(&buf, 0); mem_write_u32_le(&buf, 0);
mem_write_u32_le(&buf, 0); mem_write_u32_le(&buf, 0);
// 'strl' LIST
mem_write(&buf, "LIST", 4);
mem_write_u32_le(&buf, 4 + 8 + 56 + 8 + 40);
mem_write(&buf, "strl", 4);
// 'strh'
mem_write(&buf, "strh", 4);
mem_write_u32_le(&buf, 56);
mem_write(&buf, "vids", 4);
mem_write(&buf, "MJPG", 4);
mem_write_u32_le(&buf, 0);
mem_write_u16_le(&buf, 0);
mem_write_u16_le(&buf, 0);
mem_write_u32_le(&buf, 0);
mem_write_u32_le(&buf, 1);
mem_write_u32_le(&buf, fps);
mem_write_u32_le(&buf, 0);
mem_write_u32_le(&buf, num_images);
mem_write_u32_le(&buf, width * height * 3);
mem_write_u32_le(&buf, (uint32_t)-1);
mem_write_u32_le(&buf, 0);
mem_write_u16_le(&buf, 0); mem_write_u16_le(&buf, 0);
mem_write_u16_le(&buf, 0); mem_write_u16_le(&buf, 0);
// 'strf'
mem_write(&buf, "strf", 4);
mem_write_u32_le(&buf, 40);
mem_write_u32_le(&buf, 40);
mem_write_u32_le(&buf, width);
mem_write_u32_le(&buf, height);
mem_write_u16_le(&buf, 1);
mem_write_u16_le(&buf, 24);
mem_write(&buf, "MJPG", 4);
mem_write_u32_le(&buf, width * height * 3);
mem_write_u32_le(&buf, 0);
mem_write_u32_le(&buf, 0);
mem_write_u32_le(&buf, 0);
mem_write_u32_le(&buf, 0);
// 'movi' LIST
mem_write(&buf, "LIST", 4);
size_t movi_size_pos = buf.size;
mem_write_u32_le(&buf, 0);
mem_write(&buf, "movi", 4);
avi_index_entry* index = (avi_index_entry*)malloc(sizeof(avi_index_entry) * num_images);
// Encode and write each frame
for (int i = 0; i < num_images; i++) {
struct {
uint8_t* buf;
size_t size;
} jpeg_data = {NULL, 0};
auto write_to_buf = [](void* context, void* data, int size) {
auto jd = (decltype(jpeg_data)*)context;
jd->buf = (uint8_t*)realloc(jd->buf, jd->size + size);
memcpy(jd->buf + jd->size, data, size);
jd->size += size;
};
stbi_write_jpg_to_func(
write_to_buf, &jpeg_data,
images[i].width, images[i].height,
channels, images[i].data, quality
);
mem_write(&buf, "00dc", 4);
mem_write_u32_le(&buf, jpeg_data.size);
index[i].offset = buf.size - 8;
index[i].size = jpeg_data.size;
mem_write(&buf, jpeg_data.buf, jpeg_data.size);
if (jpeg_data.size % 2) mem_write(&buf, "\0", 1);
free(jpeg_data.buf);
}
// finalize movi size
uint32_t movi_size = buf.size - movi_size_pos - 4;
memcpy(buf.data + movi_size_pos, &movi_size, 4);
// write idx1
mem_write(&buf, "idx1", 4);
mem_write_u32_le(&buf, num_images * 16);
for (int i = 0; i < num_images; i++) {
mem_write(&buf, "00dc", 4);
mem_write_u32_le(&buf, 0x10);
mem_write_u32_le(&buf, index[i].offset);
mem_write_u32_le(&buf, index[i].size);
}
// finalize RIFF size
uint32_t riff_size = buf.size - riff_size_pos - 4;
memcpy(buf.data + riff_size_pos, &riff_size, 4);
free(index);
*out_data = buf.data;
*out_len = buf.size;
return 0;
}
#endif // __AVI_WRITER_H__

View file

@ -37,6 +37,8 @@
// #define STB_IMAGE_RESIZE_IMPLEMENTATION //already defined in llava
#include "stb_image_resize.h"
#include "avi_writer.h"
static_assert((int)SD_TYPE_COUNT == (int)GGML_TYPE_COUNT,
"inconsistency between SD_TYPE_COUNT and GGML_TYPE_COUNT");
@ -721,9 +723,12 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
params.pm_params.id_images = photomaker_imgs.data();
params.pm_params.id_images_count = photomaker_imgs.size();
//the below params are only used in video models. May move into standalone object in future
int vid_req_frames = inputs.vid_req_frames;
int generated_num_results = 1;
if(is_vid_model)
{
int num_results = 1;
std::vector<sd_image_t> control_frames; //empty for now
sd_vid_gen_params_t vid_gen_params = {};
sd_vid_gen_params_init (&vid_gen_params);
@ -737,8 +742,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
vid_gen_params.sample_params = params.sample_params;
vid_gen_params.strength = params.strength;
vid_gen_params.seed = params.seed;
vid_gen_params.video_frames = 1;
if(!sd_is_quiet && sddebugmode==1)
vid_gen_params.video_frames = vid_req_frames;
if(!sd_is_quiet && sddebugmode==1)
{
std::stringstream ss;
ss << "\nVID PROMPT:" << vid_gen_params.prompt
@ -755,7 +760,11 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
}
fflush(stdout);
results = generate_video(sd_ctx, &vid_gen_params, &num_results);
results = generate_video(sd_ctx, &vid_gen_params, &generated_num_results);
if(!sd_is_quiet && sddebugmode==1)
{
printf("\nRequested Vid Frames: %d, Generated Vid Frames: %d\n",vid_req_frames, generated_num_results);
}
}
else if (!is_img2img)
{
@ -906,12 +915,27 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
continue;
}
int out_data_len;
unsigned char * png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params).c_str());
if (png != NULL)
//if multiframe, make a video
if(vid_req_frames>1 && generated_num_results>1 && is_vid_model)
{
recent_data = kcpp_base64_encode(png,out_data_len);
free(png);
uint8_t * out_data = nullptr;
size_t out_len = 0;
int status = create_mjpg_avi_membuf_from_sd_images(results, generated_num_results, 24, 40, &out_data,&out_len);
if(status==0)
{
recent_data = kcpp_base64_encode(out_data, out_len);
free(out_data);
}
}
else
{
int out_data_len;
unsigned char * png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params).c_str());
if (png != NULL)
{
recent_data = kcpp_base64_encode(png,out_data_len);
free(png);
}
}
free(results[i].data);