diff --git a/expose.h b/expose.h index 9967bfdca..ef75c0dab 100644 --- a/expose.h +++ b/expose.h @@ -200,6 +200,7 @@ struct sd_generation_inputs const int seed = 0; const char * sample_method = nullptr; const int clip_skip = -1; + const int vid_req_frames = 1; }; struct sd_generation_outputs { diff --git a/koboldcpp.py b/koboldcpp.py index 88c2e7d5b..b9b37fcd9 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -313,7 +313,8 @@ class sd_generation_inputs(ctypes.Structure): ("height", ctypes.c_int), ("seed", ctypes.c_int), ("sample_method", ctypes.c_char_p), - ("clip_skip", ctypes.c_int)] + ("clip_skip", ctypes.c_int), + ("vid_req_frames", ctypes.c_int)] class sd_generation_outputs(ctypes.Structure): _fields_ = [("status", ctypes.c_int), @@ -1815,6 +1816,8 @@ def sd_generate(genparams): seed = random.randint(100000, 999999) sample_method = genparams.get("sampler_name", "k_euler_a") clip_skip = tryparseint(genparams.get("clip_skip", -1),-1) + vid_req_frames = tryparseint(genparams.get("frames", 1),1) + vid_req_frames = 1 if (not vid_req_frames or vid_req_frames < 1) else vid_req_frames extra_images_arr = genparams.get("extra_images", []) extra_images_arr = ([] if not extra_images_arr else extra_images_arr) extra_images_arr = [img for img in extra_images_arr if img not in (None, "")] @@ -1846,6 +1849,7 @@ def sd_generate(genparams): inputs.seed = seed inputs.sample_method = sample_method.lower().encode("UTF-8") inputs.clip_skip = clip_skip + inputs.vid_req_frames = vid_req_frames ret = handle.sd_generate(inputs) outstr = "" if ret.status==1: @@ -5397,13 +5401,13 @@ def show_gui(): makefileentry(images_tab, "Image Gen. Model (safetensors/gguf):", "Select Image Gen Model File", sd_model_var, 1, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")], tooltiptxt="Select a .safetensors or .gguf Image Generation model file on disk to be loaded.") makelabelentry(images_tab, "Clamp Resolution Limit (Hard):", sd_clamped_var, 4, 50, padx=190,singleline=True,tooltip="Limit generation steps and output image size for shared use.\nSet to 0 to disable, otherwise value is clamped to the max size limit (min 512px).") makelabelentry(images_tab, "(Soft):", sd_clamped_soft_var, 4, 50, padx=290,singleline=True,tooltip="Square image size restriction, to protect the server against memory crashes.\nAllows width-height tradeoffs, eg. 640 allows 640x640 and 512x768\nLeave at 0 for the default value: 832 for SD1.5/SD2, 1024 otherwise.",labelpadx=250) - makelabelentry(images_tab, "Image Threads:" , sd_threads_var, 8, 50,padx=290,singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.") + makelabelentry(images_tab, "ImgThreads:" , sd_threads_var, 8, 50,padx=290,singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.",labelpadx=210) sd_model_var.trace_add("write", gui_changed_modelfile) - makelabelcombobox(images_tab, "Compress Weights (Saves Memory): ", sd_quant_var, 10, width=60, padx=220, labelpadx=8, tooltiptxt="Quantizes the SD model weights to save memory.\nHigher levels save more memory, and cause more quality degradation.", values=sd_quant_choices) + makelabelcombobox(images_tab, "Compress Weights: ", sd_quant_var, 8, width=60, padx=126, labelpadx=8, tooltiptxt="Quantizes the SD model weights to save memory.\nHigher levels save more memory, and cause more quality degradation.", values=sd_quant_choices) sd_quant_var.trace_add("write", changed_gpulayers_estimate) - makefileentry(images_tab, "Image LoRA (safetensors/gguf):", "Select SD lora file",sd_lora_var, 20, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded. Should be unquantized!") - makelabelentry(images_tab, "Image LoRA Multiplier:" , sd_loramult_var, 22, 50,padx=290,singleline=True,tooltip="What mutiplier value to apply the SD LoRA with.") + makefileentry(images_tab, "Image LoRA:", "Select SD lora file",sd_lora_var, 20, width=160, singlerow=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded. Should be unquantized!") + makelabelentry(images_tab, "Multiplier:" , sd_loramult_var, 20, 50,padx=390,singleline=True,tooltip="What mutiplier value to apply the SD LoRA with.",labelpadx=330) makefileentry(images_tab, "T5-XXL File:", "Select Optional T5-XXL model file (SD3 or flux)",sd_t5xxl_var, 24, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.") makefileentry(images_tab, "Clip-L File:", "Select Optional Clip-L model file (SD3 or flux)",sd_clipl_var, 26, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.") @@ -5424,7 +5428,7 @@ def show_gui(): makecheckbox(images_tab, "TAE SD (AutoFix Broken VAE)", sd_vaeauto_var, 42,command=toggletaesd,tooltiptxt="Replace VAE with TAESD. May fix bad VAE.") makelabelcombobox(images_tab, "Conv2D Direct:", sd_convdirect_var, row=42, labelpadx=220, padx=310, width=90, tooltiptxt="Use Conv2D Direct operation. May save memory or improve performance.\nMight crash if not supported by the backend.\n", values=sd_convdirect_choices) makelabelentry(images_tab, "VAE Tiling Threshold:", sd_tiled_vae_var, 44, 50, padx=144,singleline=True,tooltip="Enable VAE Tiling for images above this size, to save memory.\nSet to 0 to disable VAE tiling.") - makecheckbox(images_tab, "SD Flash Attention", sd_flash_attention_var, 46, tooltiptxt="Enable Flash Attention for image diffusion. May save memory or improve performance.") + makecheckbox(images_tab, "SD Flash Attention", sd_flash_attention_var, 44,padx=230, tooltiptxt="Enable Flash Attention for image diffusion. May save memory or improve performance.") # audio tab audio_tab = tabcontent["Audio"] diff --git a/otherarch/sdcpp/avi_writer.h b/otherarch/sdcpp/avi_writer.h index 8cfb9a570..7d46865b6 100644 --- a/otherarch/sdcpp/avi_writer.h +++ b/otherarch/sdcpp/avi_writer.h @@ -214,4 +214,180 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int return 0; } + +//// KCPP PART FOR WRITING AVI TO MEMORY + +typedef struct { + uint8_t* data; + size_t size; +} mem_buffer_t; + +// Append raw bytes +static void mem_write(mem_buffer_t* buf, const void* data, size_t size) { + buf->data = (uint8_t*)realloc(buf->data, buf->size + size); + memcpy(buf->data + buf->size, data, size); + buf->size += size; +} + +// Write 32-bit LE +static void mem_write_u32_le(mem_buffer_t* buf, uint32_t val) { + mem_write(buf, &val, 4); +} + +// Write 16-bit LE +static void mem_write_u16_le(mem_buffer_t* buf, uint16_t val) { + mem_write(buf, &val, 2); +} + +/** + * Create MJPG AVI file in memory and return as base64 string. + * Returns 0 on success, -1 on failure + * must be freed by caller after use + */ +int create_mjpg_avi_membuf_from_sd_images(sd_image_t* images, int num_images, int fps, int quality, uint8_t** out_data, size_t *out_len) +{ + if (num_images == 0) { + fprintf(stderr, "Error: Image array is empty.\n"); + return -1; + } + + mem_buffer_t buf = {NULL, 0}; + uint32_t width = images[0].width; + uint32_t height = images[0].height; + uint32_t channels = images[0].channel; + + if (channels != 3 && channels != 4) { + fprintf(stderr, "Error: Unsupported channel count: %u\n", channels); + return -1; + } + + // --- RIFF AVI Header --- + mem_write(&buf, "RIFF", 4); + size_t riff_size_pos = buf.size; + mem_write_u32_le(&buf, 0); // placeholder + mem_write(&buf, "AVI ", 4); + + // 'hdrl' LIST + mem_write(&buf, "LIST", 4); + mem_write_u32_le(&buf, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40); + mem_write(&buf, "hdrl", 4); + + // 'avih' + mem_write(&buf, "avih", 4); + mem_write_u32_le(&buf, 56); + mem_write_u32_le(&buf, 1000000 / fps); + mem_write_u32_le(&buf, 0); + mem_write_u32_le(&buf, 0); + mem_write_u32_le(&buf, 0x110); + mem_write_u32_le(&buf, num_images); + mem_write_u32_le(&buf, 0); + mem_write_u32_le(&buf, 1); + mem_write_u32_le(&buf, width * height * 3); + mem_write_u32_le(&buf, width); + mem_write_u32_le(&buf, height); + mem_write_u32_le(&buf, 0); mem_write_u32_le(&buf, 0); + mem_write_u32_le(&buf, 0); mem_write_u32_le(&buf, 0); + + // 'strl' LIST + mem_write(&buf, "LIST", 4); + mem_write_u32_le(&buf, 4 + 8 + 56 + 8 + 40); + mem_write(&buf, "strl", 4); + + // 'strh' + mem_write(&buf, "strh", 4); + mem_write_u32_le(&buf, 56); + mem_write(&buf, "vids", 4); + mem_write(&buf, "MJPG", 4); + mem_write_u32_le(&buf, 0); + mem_write_u16_le(&buf, 0); + mem_write_u16_le(&buf, 0); + mem_write_u32_le(&buf, 0); + mem_write_u32_le(&buf, 1); + mem_write_u32_le(&buf, fps); + mem_write_u32_le(&buf, 0); + mem_write_u32_le(&buf, num_images); + mem_write_u32_le(&buf, width * height * 3); + mem_write_u32_le(&buf, (uint32_t)-1); + mem_write_u32_le(&buf, 0); + mem_write_u16_le(&buf, 0); mem_write_u16_le(&buf, 0); + mem_write_u16_le(&buf, 0); mem_write_u16_le(&buf, 0); + + // 'strf' + mem_write(&buf, "strf", 4); + mem_write_u32_le(&buf, 40); + mem_write_u32_le(&buf, 40); + mem_write_u32_le(&buf, width); + mem_write_u32_le(&buf, height); + mem_write_u16_le(&buf, 1); + mem_write_u16_le(&buf, 24); + mem_write(&buf, "MJPG", 4); + mem_write_u32_le(&buf, width * height * 3); + mem_write_u32_le(&buf, 0); + mem_write_u32_le(&buf, 0); + mem_write_u32_le(&buf, 0); + mem_write_u32_le(&buf, 0); + + // 'movi' LIST + mem_write(&buf, "LIST", 4); + size_t movi_size_pos = buf.size; + mem_write_u32_le(&buf, 0); + mem_write(&buf, "movi", 4); + + avi_index_entry* index = (avi_index_entry*)malloc(sizeof(avi_index_entry) * num_images); + + // Encode and write each frame + for (int i = 0; i < num_images; i++) { + struct { + uint8_t* buf; + size_t size; + } jpeg_data = {NULL, 0}; + + auto write_to_buf = [](void* context, void* data, int size) { + auto jd = (decltype(jpeg_data)*)context; + jd->buf = (uint8_t*)realloc(jd->buf, jd->size + size); + memcpy(jd->buf + jd->size, data, size); + jd->size += size; + }; + + stbi_write_jpg_to_func( + write_to_buf, &jpeg_data, + images[i].width, images[i].height, + channels, images[i].data, quality + ); + + mem_write(&buf, "00dc", 4); + mem_write_u32_le(&buf, jpeg_data.size); + index[i].offset = buf.size - 8; + index[i].size = jpeg_data.size; + mem_write(&buf, jpeg_data.buf, jpeg_data.size); + if (jpeg_data.size % 2) mem_write(&buf, "\0", 1); + + free(jpeg_data.buf); + } + + // finalize movi size + uint32_t movi_size = buf.size - movi_size_pos - 4; + memcpy(buf.data + movi_size_pos, &movi_size, 4); + + // write idx1 + mem_write(&buf, "idx1", 4); + mem_write_u32_le(&buf, num_images * 16); + for (int i = 0; i < num_images; i++) { + mem_write(&buf, "00dc", 4); + mem_write_u32_le(&buf, 0x10); + mem_write_u32_le(&buf, index[i].offset); + mem_write_u32_le(&buf, index[i].size); + } + + // finalize RIFF size + uint32_t riff_size = buf.size - riff_size_pos - 4; + memcpy(buf.data + riff_size_pos, &riff_size, 4); + + free(index); + + *out_data = buf.data; + *out_len = buf.size; + return 0; +} + #endif // __AVI_WRITER_H__ \ No newline at end of file diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp index 2e4f8679d..a0b2e7fd3 100644 --- a/otherarch/sdcpp/sdtype_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -37,6 +37,8 @@ // #define STB_IMAGE_RESIZE_IMPLEMENTATION //already defined in llava #include "stb_image_resize.h" +#include "avi_writer.h" + static_assert((int)SD_TYPE_COUNT == (int)GGML_TYPE_COUNT, "inconsistency between SD_TYPE_COUNT and GGML_TYPE_COUNT"); @@ -721,9 +723,12 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) params.pm_params.id_images = photomaker_imgs.data(); params.pm_params.id_images_count = photomaker_imgs.size(); + //the below params are only used in video models. May move into standalone object in future + int vid_req_frames = inputs.vid_req_frames; + int generated_num_results = 1; + if(is_vid_model) { - int num_results = 1; std::vector control_frames; //empty for now sd_vid_gen_params_t vid_gen_params = {}; sd_vid_gen_params_init (&vid_gen_params); @@ -737,8 +742,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) vid_gen_params.sample_params = params.sample_params; vid_gen_params.strength = params.strength; vid_gen_params.seed = params.seed; - vid_gen_params.video_frames = 1; - if(!sd_is_quiet && sddebugmode==1) + vid_gen_params.video_frames = vid_req_frames; + if(!sd_is_quiet && sddebugmode==1) { std::stringstream ss; ss << "\nVID PROMPT:" << vid_gen_params.prompt @@ -755,7 +760,11 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) } fflush(stdout); - results = generate_video(sd_ctx, &vid_gen_params, &num_results); + results = generate_video(sd_ctx, &vid_gen_params, &generated_num_results); + if(!sd_is_quiet && sddebugmode==1) + { + printf("\nRequested Vid Frames: %d, Generated Vid Frames: %d\n",vid_req_frames, generated_num_results); + } } else if (!is_img2img) { @@ -906,12 +915,27 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) continue; } - int out_data_len; - unsigned char * png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params).c_str()); - if (png != NULL) + //if multiframe, make a video + if(vid_req_frames>1 && generated_num_results>1 && is_vid_model) { - recent_data = kcpp_base64_encode(png,out_data_len); - free(png); + uint8_t * out_data = nullptr; + size_t out_len = 0; + int status = create_mjpg_avi_membuf_from_sd_images(results, generated_num_results, 24, 40, &out_data,&out_len); + if(status==0) + { + recent_data = kcpp_base64_encode(out_data, out_len); + free(out_data); + } + } + else + { + int out_data_len; + unsigned char * png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params).c_str()); + if (png != NULL) + { + recent_data = kcpp_base64_encode(png,out_data_len); + free(png); + } } free(results[i].data);