From 80dabbb689d414cf63d1217fccab4f185506e6cd Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Fri, 22 Aug 2025 23:23:32 +0800 Subject: [PATCH] minor adjustments for sdquant: allow backend to do the translation for the type more defensively, adjust the UI dropdown for clarity. --- koboldcpp.py | 17 ++++++++++------- otherarch/sdcpp/sdtype_adapter.cpp | 6 +++--- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 9870648ed..05c479bdf 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1657,10 +1657,14 @@ def sd_convdirect_option(value): return 'full' raise argparse.ArgumentTypeError(f"Invalid sdconvdirect option \"{value}\". Must be one of {sd_convdirect_choices}.") -sd_quant_choices = ['0 (off)', '1 (q8_0)', '2 (q4_0)'] +sd_quant_choices = ['off','q8','q4'] def sd_quant_option(value): - return int((value or '0')[0]) + try: + lvl = sd_quant_choices.index(value) + return lvl + except Exception: + return 0 def sd_load_model(model_filename,vae_filename,lora_filename,t5xxl_filename,clipl_filename,clipg_filename,photomaker_filename): global args @@ -1674,8 +1678,7 @@ def sd_load_model(model_filename,vae_filename,lora_filename,t5xxl_filename,clipl thds = sdt inputs.threads = thds - sd_quant_types = {0: -1, 1: 8, 2: 2} # enum sd_type_t - inputs.quant = sd_quant_types[args.sdquant] + inputs.quant = args.sdquant inputs.flash_attention = args.sdflashattention sdconvdirect = sd_convdirect_option(args.sdconvdirect) inputs.diffusion_conv_direct = sdconvdirect == 'full' @@ -4943,7 +4946,7 @@ def show_gui(): pass def changed_gpulayers_estimate(*args): - predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),sd_quant_option(sd_quant_var.get() or 0),int(blasbatchsize_values[int(blas_size_var.get())]),(quantkv_var.get() if flashattention_var.get()==1 else 0)) + predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),sd_quant_option(sd_quant_var.get()),int(blasbatchsize_values[int(blas_size_var.get())]),(quantkv_var.get() if flashattention_var.get()==1 else 0)) max_gpu_layers = (f"/{modelfile_extracted_meta[1][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[1] and modelfile_extracted_meta[1][0]!=0) else "") index = runopts_var.get() gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CUDA" or index == "Use hipBLAS (ROCm)") @@ -5345,7 +5348,7 @@ def show_gui(): makelabelentry(images_tab, "(Soft):", sd_clamped_soft_var, 4, 50, padx=290,singleline=True,tooltip="Square image size restriction, to protect the server against memory crashes.\nAllows width-height tradeoffs, eg. 640 allows 640x640 and 512x768\nLeave at 0 for the default value: 832 for SD1.5/SD2, 1024 otherwise.",labelpadx=250) makelabelentry(images_tab, "Image Threads:" , sd_threads_var, 8, 50,padx=290,singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.") sd_model_var.trace_add("write", gui_changed_modelfile) - makelabelcombobox(images_tab, "Compress Weights (Saves Memory)", sd_quant_var, 10, width=50, labelpadx=65, tooltiptxt="Quantizes the SD model weights to save memory.\nHigher levels save more memory, and cause more quality degradation.", values=sd_quant_choices) + makelabelcombobox(images_tab, "Compress Weights (Saves Memory): ", sd_quant_var, 10, width=60, padx=220, labelpadx=8, tooltiptxt="Quantizes the SD model weights to save memory.\nHigher levels save more memory, and cause more quality degradation.", values=sd_quant_choices) sd_quant_var.trace_add("write", changed_gpulayers_estimate) makefileentry(images_tab, "Image LoRA (safetensors/gguf):", "Select SD lora file",sd_lora_var, 20, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded. Should be unquantized!") @@ -5840,7 +5843,7 @@ def show_gui(): sd_clamped_var.set(int(dict["sdclamped"]) if ("sdclamped" in dict and dict["sdclamped"]) else 0) sd_clamped_soft_var.set(int(dict["sdclampedsoft"]) if ("sdclampedsoft" in dict and dict["sdclampedsoft"]) else 0) sd_threads_var.set(str(dict["sdthreads"]) if ("sdthreads" in dict and dict["sdthreads"]) else str(default_threads)) - sd_quant_var.set(sd_quant_choices[(dict["sdquant"] if "sdquant" in dict else 0)]) + sd_quant_var.set(sd_quant_choices[(dict["sdquant"] if ("sdquant" in dict and dict["sdquant"]>=0 and dict["sdquant"]model_path = inputs.model_filename; sd_params->wtype = SD_TYPE_COUNT; - if (inputs.quant > 0 && inputs.quant < SD_TYPE_COUNT) { - sd_params->wtype = (sd_type_t) inputs.quant; - printf("\nDiffusion Model quantized to %s", sd_type_name(sd_params->wtype)); + if (inputs.quant > 0) { + sd_params->wtype = (inputs.quant==1?SD_TYPE_Q8_0:SD_TYPE_Q4_0); + printf("Diffusion Model quantized to %s\n", sd_type_name(sd_params->wtype)); } sd_params->n_threads = inputs.threads; //if -1 use physical cores sd_params->diffusion_flash_attn = inputs.flash_attention;