diff --git a/expose.h b/expose.h index c9d59d25f..25527ed1c 100644 --- a/expose.h +++ b/expose.h @@ -162,7 +162,7 @@ struct sd_load_model_inputs const int threads = 0; const int quant = 0; const bool taesd = false; - const bool notile = false; + const int tiled_vae_threshold = 0; const char * t5xxl_filename = nullptr; const char * clipl_filename = nullptr; const char * clipg_filename = nullptr; diff --git a/koboldcpp.py b/koboldcpp.py index 406cdeff6..8ac5de215 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -52,6 +52,7 @@ default_ttsmaxlen = 4096 default_visionmaxres = 1024 net_save_slots = 10 savestate_limit = 3 #3 savestate slots +default_vae_tile_threshold = 768 # abuse prevention stop_token_max = 256 @@ -272,7 +273,7 @@ class sd_load_model_inputs(ctypes.Structure): ("threads", ctypes.c_int), ("quant", ctypes.c_int), ("taesd", ctypes.c_bool), - ("notile", ctypes.c_bool), + ("tiled_vae_threshold", ctypes.c_int), ("t5xxl_filename", ctypes.c_char_p), ("clipl_filename", ctypes.c_char_p), ("clipg_filename", ctypes.c_char_p), @@ -1549,7 +1550,7 @@ def sd_load_model(model_filename,vae_filename,lora_filename,t5xxl_filename,clipl inputs.threads = thds inputs.quant = quant inputs.taesd = True if args.sdvaeauto else False - inputs.notile = True if args.sdnotile else False + inputs.tiled_vae_threshold = args.sdtiledvae inputs.vae_filename = vae_filename.encode("UTF-8") inputs.lora_filename = lora_filename.encode("UTF-8") inputs.lora_multiplier = args.sdloramult @@ -4303,7 +4304,7 @@ def show_gui(): sd_clipg_var = ctk.StringVar() sd_photomaker_var = ctk.StringVar() sd_vaeauto_var = ctk.IntVar(value=0) - sd_notile_var = ctk.IntVar(value=0) + sd_tiled_vae_var = ctk.StringVar(value=str(default_vae_tile_threshold)) sd_clamped_var = ctk.StringVar(value="0") sd_clamped_soft_var = ctk.StringVar(value="0") sd_threads_var = ctk.StringVar(value=str(default_threads)) @@ -5033,7 +5034,7 @@ def show_gui(): sdvaeitem2.grid() sdvaeitem3.grid() makecheckbox(images_tab, "Use TAE SD (AutoFix Broken VAE)", sd_vaeauto_var, 42,command=toggletaesd,tooltiptxt="Replace VAE with TAESD. May fix bad VAE.") - makecheckbox(images_tab, "No VAE Tiling", sd_notile_var, 44,tooltiptxt="Disables VAE tiling, may not work for large images.") + makelabelentry(images_tab, "VAE Tiling Threshold:", sd_tiled_vae_var, 44, 50, padx=144,singleline=True,tooltip="Enable VAE Tiling for images above this size, to save memory.\nSet to 0 to disable VAE tiling.") # audio tab audio_tab = tabcontent["Audio"] @@ -5266,7 +5267,7 @@ def show_gui(): args.sdthreads = (0 if sd_threads_var.get()=="" else int(sd_threads_var.get())) args.sdclamped = (0 if int(sd_clamped_var.get())<=0 else int(sd_clamped_var.get())) args.sdclampedsoft = (0 if int(sd_clamped_soft_var.get())<=0 else int(sd_clamped_soft_var.get())) - args.sdnotile = (True if sd_notile_var.get()==1 else False) + args.sdtiledvae = (default_vae_tile_threshold if sd_tiled_vae_var.get()=="" else int(sd_tiled_vae_var.get())) if sd_vaeauto_var.get()==1: args.sdvaeauto = True args.sdvae = "" @@ -5488,7 +5489,8 @@ def show_gui(): sd_clipg_var.set(dict["sdclipg"] if ("sdclipg" in dict and dict["sdclipg"]) else "") sd_photomaker_var.set(dict["sdphotomaker"] if ("sdphotomaker" in dict and dict["sdphotomaker"]) else "") sd_vaeauto_var.set(1 if ("sdvaeauto" in dict and dict["sdvaeauto"]) else 0) - sd_notile_var.set(1 if ("sdnotile" in dict and dict["sdnotile"]) else 0) + sd_tiled_vae_var.set(str(dict["sdtiledvae"]) if ("sdtiledvae" in dict and dict["sdtiledvae"]) else str(default_vae_tile_threshold)) + sd_lora_var.set(dict["sdlora"] if ("sdlora" in dict and dict["sdlora"]) else "") sd_loramult_var.set(str(dict["sdloramult"]) if ("sdloramult" in dict and dict["sdloramult"]) else "1.0") @@ -5856,6 +5858,8 @@ def convert_invalid_args(args): dict["model_param"] = model_value elif isinstance(model_value, list) and model_value: # Non-empty list dict["model_param"] = model_value[0] # Take the first file in the list + if "sdnotile" in dict and "sdtiledvae" not in dict: + dict["sdtiledvae"] = (0 if (dict["sdnotile"]) else default_vae_tile_threshold) # convert legacy option return args def setuptunnel(global_memory, has_sd): @@ -7269,8 +7273,7 @@ if __name__ == '__main__': sdparsergrouplora.add_argument("--sdquant", help="If specified, loads the model quantized to save memory.", action='store_true') sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify an image generation LORA safetensors model to be applied.", default="") sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the image LORA model to be applied.", type=float, default=1.0) - sdparsergroup.add_argument("--sdnotile", help="Disables VAE tiling, may not work for large images.", action='store_true') - + sdparsergroup.add_argument("--sdtiledvae", metavar=('[maxres]'), help="Adjust the automatic VAE tiling trigger for images above this size. 0 disables vae tiling.", type=int, default=default_vae_tile_threshold) whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands') whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper .bin model to enable Speech-To-Text transcription.", default="") @@ -7296,5 +7299,6 @@ if __name__ == '__main__': deprecatedgroup.add_argument("--sdconfig", help=argparse.SUPPRESS, nargs='+') compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true') compatgroup3.add_argument("--nommap", help=argparse.SUPPRESS, action='store_true') + deprecatedgroup.add_argument("--sdnotile", help=argparse.SUPPRESS, action='store_true') # legacy option, see sdtiledvae main(launch_args=parser.parse_args(),default_args=parser.parse_args([])) diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp index 15c73ffdd..9170ada47 100644 --- a/otherarch/sdcpp/sdtype_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -119,7 +119,7 @@ static uint8_t * input_mask_buffer = NULL; static uint8_t * input_photomaker_buffer = NULL; static std::string sdplatformenv, sddeviceenv, sdvulkandeviceenv; -static bool notiling = false; +static int cfg_tiled_vae_threshold = 0; static int cfg_square_limit = 0; static int cfg_side_limit = 0; static bool sd_is_quiet = false; @@ -137,7 +137,9 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) { std::string clipl_filename = inputs.clipl_filename; std::string clipg_filename = inputs.clipg_filename; std::string photomaker_filename = inputs.photomaker_filename; - notiling = inputs.notile; + cfg_tiled_vae_threshold = inputs.tiled_vae_threshold; + cfg_tiled_vae_threshold = (cfg_tiled_vae_threshold > 8192 ? 8192 : cfg_tiled_vae_threshold); + cfg_tiled_vae_threshold = (cfg_tiled_vae_threshold <= 0 ? 8192 : cfg_tiled_vae_threshold); //if negative dont tile cfg_side_limit = inputs.img_hard_limit; cfg_square_limit = inputs.img_soft_limit; printf("\nImageGen Init - Load Model: %s\n",inputs.model_filename); @@ -489,7 +491,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) printf("\nKCPP SD: Requested dimensions %dx%d changed to %dx%d\n", inputs.width, inputs.height, sd_params->width, sd_params->height); } - bool dotile = (sd_params->width>768 || sd_params->height>768) && !notiling; + // trigger tiling by image area, the memory used for the VAE buffer is 6656 bytes per image pixel, default 768x768 + bool dotile = (sd_params->width*sd_params->height > cfg_tiled_vae_threshold*cfg_tiled_vae_threshold); set_sd_vae_tiling(sd_ctx,dotile); //changes vae tiling, prevents memory related crash/oom if (sd_params->clip_skip <= 0) {