From 7b71742223defdab20e2d606a2f3485f89d5cc23 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 28 Sep 2025 22:08:29 +0800 Subject: [PATCH] added additional toggles for SD, sdoffloadcpu , sdvaecpu and sdclipcpu --- expose.h | 3 +++ koboldcpp.py | 24 ++++++++++++++++++++++++ otherarch/sdcpp/sdtype_adapter.cpp | 3 +++ 3 files changed, 30 insertions(+) diff --git a/expose.h b/expose.h index ef75c0dab..ba6df5ce2 100644 --- a/expose.h +++ b/expose.h @@ -167,6 +167,9 @@ struct sd_load_model_inputs const int threads = 0; const int quant = 0; const bool flash_attention = false; + const bool offload_cpu = false; + const bool vae_cpu = false; + const bool clip_cpu = false; const bool diffusion_conv_direct = false; const bool vae_conv_direct = false; const bool taesd = false; diff --git a/koboldcpp.py b/koboldcpp.py index b9b37fcd9..5efd063c5 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -282,6 +282,9 @@ class sd_load_model_inputs(ctypes.Structure): ("threads", ctypes.c_int), ("quant", ctypes.c_int), ("flash_attention", ctypes.c_bool), + ("offload_cpu", ctypes.c_bool), + ("vae_cpu", ctypes.c_bool), + ("clip_cpu", ctypes.c_bool), ("diffusion_conv_direct", ctypes.c_bool), ("vae_conv_direct", ctypes.c_bool), ("taesd", ctypes.c_bool), @@ -1709,6 +1712,9 @@ def sd_load_model(model_filename,vae_filename,lora_filename,t5xxl_filename,clipl inputs.threads = thds inputs.quant = args.sdquant inputs.flash_attention = args.sdflashattention + inputs.offload_cpu = args.sdoffloadcpu + inputs.vae_cpu = args.sdvaecpu + inputs.clip_cpu = args.sdclipcpu sdconvdirect = sd_convdirect_option(args.sdconvdirect) inputs.diffusion_conv_direct = sdconvdirect == 'full' inputs.vae_conv_direct = sdconvdirect in ['vaeonly', 'full'] @@ -4650,6 +4656,9 @@ def show_gui(): sd_clipg_var = ctk.StringVar() sd_photomaker_var = ctk.StringVar() sd_flash_attention_var = ctk.IntVar(value=0) + sd_offload_cpu_var = ctk.IntVar(value=0) + sd_vae_cpu_var = ctk.IntVar(value=0) + sd_clip_cpu_var = ctk.IntVar(value=0) sd_vaeauto_var = ctk.IntVar(value=0) sd_tiled_vae_var = ctk.StringVar(value=str(default_vae_tile_threshold)) sd_convdirect_var = ctk.StringVar(value=str(sd_convdirect_choices[0])) @@ -5429,6 +5438,9 @@ def show_gui(): makelabelcombobox(images_tab, "Conv2D Direct:", sd_convdirect_var, row=42, labelpadx=220, padx=310, width=90, tooltiptxt="Use Conv2D Direct operation. May save memory or improve performance.\nMight crash if not supported by the backend.\n", values=sd_convdirect_choices) makelabelentry(images_tab, "VAE Tiling Threshold:", sd_tiled_vae_var, 44, 50, padx=144,singleline=True,tooltip="Enable VAE Tiling for images above this size, to save memory.\nSet to 0 to disable VAE tiling.") makecheckbox(images_tab, "SD Flash Attention", sd_flash_attention_var, 44,padx=230, tooltiptxt="Enable Flash Attention for image diffusion. May save memory or improve performance.") + makecheckbox(images_tab, "Model CPU Offload", sd_offload_cpu_var, 50,padx=8, tooltiptxt="Offload image weights in RAM to save VRAM, swap into VRAM when needed.") + makecheckbox(images_tab, "VAE on CPU", sd_vae_cpu_var, 50,padx=160, tooltiptxt="Force VAE to CPU only for image generation.") + makecheckbox(images_tab, "CLIP on CPU", sd_clip_cpu_var, 50,padx=280, tooltiptxt="Force CLIP to CPU only for image generation.") # audio tab audio_tab = tabcontent["Audio"] @@ -5669,6 +5681,12 @@ def show_gui(): if sd_flash_attention_var.get()==1: args.sdflashattention = True + if sd_offload_cpu_var.get()==1: + args.sdoffloadcpu = True + if sd_vae_cpu_var.get()==1: + args.sdvaecpu = True + if sd_clip_cpu_var.get()==1: + args.sdclipcpu = True args.sdthreads = (0 if sd_threads_var.get()=="" else int(sd_threads_var.get())) args.sdclamped = (0 if int(sd_clamped_var.get())<=0 else int(sd_clamped_var.get())) args.sdclampedsoft = (0 if int(sd_clamped_soft_var.get())<=0 else int(sd_clamped_soft_var.get())) @@ -5908,6 +5926,9 @@ def show_gui(): sd_threads_var.set(str(dict["sdthreads"]) if ("sdthreads" in dict and dict["sdthreads"]) else str(default_threads)) sd_quant_var.set(sd_quant_choices[(dict["sdquant"] if ("sdquant" in dict and dict["sdquant"]>=0 and dict["sdquant"]diffusion_conv_direct; params.vae_conv_direct = sd_params->vae_conv_direct; params.chroma_use_dit_mask = sd_params->chroma_use_dit_mask; + params.offload_params_to_cpu = inputs.offload_cpu; + params.keep_vae_on_cpu = inputs.vae_cpu; + params.keep_clip_on_cpu = inputs.clip_cpu; if (params.chroma_use_dit_mask && params.diffusion_flash_attn) { // note we don't know yet if it's a Chroma model