mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
Configurable VAE threshold limit (#1601)
* add backend support for changing the VAE tiling threshold * trigger VAE tiling by image area instead of dimensions I've tested with GGML_VULKAN_MEMORY_DEBUG all resolutions with the same 768x768 area (even extremes like 64x9216), and many below that: all consistently allocate 6656 bytes per image pixel. As tiling is primarily useful to avoid excessive memory usage, it seems reasonable to enable VAE tiling based on area rather than maximum image side. However, as there is currently no user interface option to change it back to a lower value, it's best to maintain the default behavior for now. * replace the notile option with a configurable threshold This allows selecting a lower threshold value, reducing the peak memory usage. The legacy sdnotile parameter gets automatically converted to the new parameter, if it's the only one supplied. * simplify tiling checks, 768 default visible in launcher --------- Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com>
This commit is contained in:
parent
caea52407a
commit
08adfb53c9
3 changed files with 19 additions and 12 deletions
2
expose.h
2
expose.h
|
@ -162,7 +162,7 @@ struct sd_load_model_inputs
|
|||
const int threads = 0;
|
||||
const int quant = 0;
|
||||
const bool taesd = false;
|
||||
const bool notile = false;
|
||||
const int tiled_vae_threshold = 0;
|
||||
const char * t5xxl_filename = nullptr;
|
||||
const char * clipl_filename = nullptr;
|
||||
const char * clipg_filename = nullptr;
|
||||
|
|
20
koboldcpp.py
20
koboldcpp.py
|
@ -52,6 +52,7 @@ default_ttsmaxlen = 4096
|
|||
default_visionmaxres = 1024
|
||||
net_save_slots = 10
|
||||
savestate_limit = 3 #3 savestate slots
|
||||
default_vae_tile_threshold = 768
|
||||
|
||||
# abuse prevention
|
||||
stop_token_max = 256
|
||||
|
@ -272,7 +273,7 @@ class sd_load_model_inputs(ctypes.Structure):
|
|||
("threads", ctypes.c_int),
|
||||
("quant", ctypes.c_int),
|
||||
("taesd", ctypes.c_bool),
|
||||
("notile", ctypes.c_bool),
|
||||
("tiled_vae_threshold", ctypes.c_int),
|
||||
("t5xxl_filename", ctypes.c_char_p),
|
||||
("clipl_filename", ctypes.c_char_p),
|
||||
("clipg_filename", ctypes.c_char_p),
|
||||
|
@ -1549,7 +1550,7 @@ def sd_load_model(model_filename,vae_filename,lora_filename,t5xxl_filename,clipl
|
|||
inputs.threads = thds
|
||||
inputs.quant = quant
|
||||
inputs.taesd = True if args.sdvaeauto else False
|
||||
inputs.notile = True if args.sdnotile else False
|
||||
inputs.tiled_vae_threshold = args.sdtiledvae
|
||||
inputs.vae_filename = vae_filename.encode("UTF-8")
|
||||
inputs.lora_filename = lora_filename.encode("UTF-8")
|
||||
inputs.lora_multiplier = args.sdloramult
|
||||
|
@ -4303,7 +4304,7 @@ def show_gui():
|
|||
sd_clipg_var = ctk.StringVar()
|
||||
sd_photomaker_var = ctk.StringVar()
|
||||
sd_vaeauto_var = ctk.IntVar(value=0)
|
||||
sd_notile_var = ctk.IntVar(value=0)
|
||||
sd_tiled_vae_var = ctk.StringVar(value=str(default_vae_tile_threshold))
|
||||
sd_clamped_var = ctk.StringVar(value="0")
|
||||
sd_clamped_soft_var = ctk.StringVar(value="0")
|
||||
sd_threads_var = ctk.StringVar(value=str(default_threads))
|
||||
|
@ -5033,7 +5034,7 @@ def show_gui():
|
|||
sdvaeitem2.grid()
|
||||
sdvaeitem3.grid()
|
||||
makecheckbox(images_tab, "Use TAE SD (AutoFix Broken VAE)", sd_vaeauto_var, 42,command=toggletaesd,tooltiptxt="Replace VAE with TAESD. May fix bad VAE.")
|
||||
makecheckbox(images_tab, "No VAE Tiling", sd_notile_var, 44,tooltiptxt="Disables VAE tiling, may not work for large images.")
|
||||
makelabelentry(images_tab, "VAE Tiling Threshold:", sd_tiled_vae_var, 44, 50, padx=144,singleline=True,tooltip="Enable VAE Tiling for images above this size, to save memory.\nSet to 0 to disable VAE tiling.")
|
||||
|
||||
# audio tab
|
||||
audio_tab = tabcontent["Audio"]
|
||||
|
@ -5266,7 +5267,7 @@ def show_gui():
|
|||
args.sdthreads = (0 if sd_threads_var.get()=="" else int(sd_threads_var.get()))
|
||||
args.sdclamped = (0 if int(sd_clamped_var.get())<=0 else int(sd_clamped_var.get()))
|
||||
args.sdclampedsoft = (0 if int(sd_clamped_soft_var.get())<=0 else int(sd_clamped_soft_var.get()))
|
||||
args.sdnotile = (True if sd_notile_var.get()==1 else False)
|
||||
args.sdtiledvae = (default_vae_tile_threshold if sd_tiled_vae_var.get()=="" else int(sd_tiled_vae_var.get()))
|
||||
if sd_vaeauto_var.get()==1:
|
||||
args.sdvaeauto = True
|
||||
args.sdvae = ""
|
||||
|
@ -5488,7 +5489,8 @@ def show_gui():
|
|||
sd_clipg_var.set(dict["sdclipg"] if ("sdclipg" in dict and dict["sdclipg"]) else "")
|
||||
sd_photomaker_var.set(dict["sdphotomaker"] if ("sdphotomaker" in dict and dict["sdphotomaker"]) else "")
|
||||
sd_vaeauto_var.set(1 if ("sdvaeauto" in dict and dict["sdvaeauto"]) else 0)
|
||||
sd_notile_var.set(1 if ("sdnotile" in dict and dict["sdnotile"]) else 0)
|
||||
sd_tiled_vae_var.set(str(dict["sdtiledvae"]) if ("sdtiledvae" in dict and dict["sdtiledvae"]) else str(default_vae_tile_threshold))
|
||||
|
||||
sd_lora_var.set(dict["sdlora"] if ("sdlora" in dict and dict["sdlora"]) else "")
|
||||
sd_loramult_var.set(str(dict["sdloramult"]) if ("sdloramult" in dict and dict["sdloramult"]) else "1.0")
|
||||
|
||||
|
@ -5856,6 +5858,8 @@ def convert_invalid_args(args):
|
|||
dict["model_param"] = model_value
|
||||
elif isinstance(model_value, list) and model_value: # Non-empty list
|
||||
dict["model_param"] = model_value[0] # Take the first file in the list
|
||||
if "sdnotile" in dict and "sdtiledvae" not in dict:
|
||||
dict["sdtiledvae"] = (0 if (dict["sdnotile"]) else default_vae_tile_threshold) # convert legacy option
|
||||
return args
|
||||
|
||||
def setuptunnel(global_memory, has_sd):
|
||||
|
@ -7269,8 +7273,7 @@ if __name__ == '__main__':
|
|||
sdparsergrouplora.add_argument("--sdquant", help="If specified, loads the model quantized to save memory.", action='store_true')
|
||||
sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify an image generation LORA safetensors model to be applied.", default="")
|
||||
sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the image LORA model to be applied.", type=float, default=1.0)
|
||||
sdparsergroup.add_argument("--sdnotile", help="Disables VAE tiling, may not work for large images.", action='store_true')
|
||||
|
||||
sdparsergroup.add_argument("--sdtiledvae", metavar=('[maxres]'), help="Adjust the automatic VAE tiling trigger for images above this size. 0 disables vae tiling.", type=int, default=default_vae_tile_threshold)
|
||||
whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands')
|
||||
whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper .bin model to enable Speech-To-Text transcription.", default="")
|
||||
|
||||
|
@ -7296,5 +7299,6 @@ if __name__ == '__main__':
|
|||
deprecatedgroup.add_argument("--sdconfig", help=argparse.SUPPRESS, nargs='+')
|
||||
compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true')
|
||||
compatgroup3.add_argument("--nommap", help=argparse.SUPPRESS, action='store_true')
|
||||
deprecatedgroup.add_argument("--sdnotile", help=argparse.SUPPRESS, action='store_true') # legacy option, see sdtiledvae
|
||||
|
||||
main(launch_args=parser.parse_args(),default_args=parser.parse_args([]))
|
||||
|
|
|
@ -119,7 +119,7 @@ static uint8_t * input_mask_buffer = NULL;
|
|||
static uint8_t * input_photomaker_buffer = NULL;
|
||||
|
||||
static std::string sdplatformenv, sddeviceenv, sdvulkandeviceenv;
|
||||
static bool notiling = false;
|
||||
static int cfg_tiled_vae_threshold = 0;
|
||||
static int cfg_square_limit = 0;
|
||||
static int cfg_side_limit = 0;
|
||||
static bool sd_is_quiet = false;
|
||||
|
@ -137,7 +137,9 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
std::string clipl_filename = inputs.clipl_filename;
|
||||
std::string clipg_filename = inputs.clipg_filename;
|
||||
std::string photomaker_filename = inputs.photomaker_filename;
|
||||
notiling = inputs.notile;
|
||||
cfg_tiled_vae_threshold = inputs.tiled_vae_threshold;
|
||||
cfg_tiled_vae_threshold = (cfg_tiled_vae_threshold > 8192 ? 8192 : cfg_tiled_vae_threshold);
|
||||
cfg_tiled_vae_threshold = (cfg_tiled_vae_threshold <= 0 ? 8192 : cfg_tiled_vae_threshold); //if negative dont tile
|
||||
cfg_side_limit = inputs.img_hard_limit;
|
||||
cfg_square_limit = inputs.img_soft_limit;
|
||||
printf("\nImageGen Init - Load Model: %s\n",inputs.model_filename);
|
||||
|
@ -489,7 +491,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
printf("\nKCPP SD: Requested dimensions %dx%d changed to %dx%d\n", inputs.width, inputs.height, sd_params->width, sd_params->height);
|
||||
}
|
||||
|
||||
bool dotile = (sd_params->width>768 || sd_params->height>768) && !notiling;
|
||||
// trigger tiling by image area, the memory used for the VAE buffer is 6656 bytes per image pixel, default 768x768
|
||||
bool dotile = (sd_params->width*sd_params->height > cfg_tiled_vae_threshold*cfg_tiled_vae_threshold);
|
||||
set_sd_vae_tiling(sd_ctx,dotile); //changes vae tiling, prevents memory related crash/oom
|
||||
|
||||
if (sd_params->clip_skip <= 0) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue