mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
minor adjustments for sdquant: allow backend to do the translation for the type more defensively, adjust the UI dropdown for clarity.
This commit is contained in:
parent
2f8b0ec538
commit
80dabbb689
2 changed files with 13 additions and 10 deletions
17
koboldcpp.py
17
koboldcpp.py
|
@ -1657,10 +1657,14 @@ def sd_convdirect_option(value):
|
|||
return 'full'
|
||||
raise argparse.ArgumentTypeError(f"Invalid sdconvdirect option \"{value}\". Must be one of {sd_convdirect_choices}.")
|
||||
|
||||
sd_quant_choices = ['0 (off)', '1 (q8_0)', '2 (q4_0)']
|
||||
sd_quant_choices = ['off','q8','q4']
|
||||
|
||||
def sd_quant_option(value):
|
||||
return int((value or '0')[0])
|
||||
try:
|
||||
lvl = sd_quant_choices.index(value)
|
||||
return lvl
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
def sd_load_model(model_filename,vae_filename,lora_filename,t5xxl_filename,clipl_filename,clipg_filename,photomaker_filename):
|
||||
global args
|
||||
|
@ -1674,8 +1678,7 @@ def sd_load_model(model_filename,vae_filename,lora_filename,t5xxl_filename,clipl
|
|||
thds = sdt
|
||||
|
||||
inputs.threads = thds
|
||||
sd_quant_types = {0: -1, 1: 8, 2: 2} # enum sd_type_t
|
||||
inputs.quant = sd_quant_types[args.sdquant]
|
||||
inputs.quant = args.sdquant
|
||||
inputs.flash_attention = args.sdflashattention
|
||||
sdconvdirect = sd_convdirect_option(args.sdconvdirect)
|
||||
inputs.diffusion_conv_direct = sdconvdirect == 'full'
|
||||
|
@ -4943,7 +4946,7 @@ def show_gui():
|
|||
pass
|
||||
|
||||
def changed_gpulayers_estimate(*args):
|
||||
predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),sd_quant_option(sd_quant_var.get() or 0),int(blasbatchsize_values[int(blas_size_var.get())]),(quantkv_var.get() if flashattention_var.get()==1 else 0))
|
||||
predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),sd_quant_option(sd_quant_var.get()),int(blasbatchsize_values[int(blas_size_var.get())]),(quantkv_var.get() if flashattention_var.get()==1 else 0))
|
||||
max_gpu_layers = (f"/{modelfile_extracted_meta[1][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[1] and modelfile_extracted_meta[1][0]!=0) else "")
|
||||
index = runopts_var.get()
|
||||
gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CUDA" or index == "Use hipBLAS (ROCm)")
|
||||
|
@ -5345,7 +5348,7 @@ def show_gui():
|
|||
makelabelentry(images_tab, "(Soft):", sd_clamped_soft_var, 4, 50, padx=290,singleline=True,tooltip="Square image size restriction, to protect the server against memory crashes.\nAllows width-height tradeoffs, eg. 640 allows 640x640 and 512x768\nLeave at 0 for the default value: 832 for SD1.5/SD2, 1024 otherwise.",labelpadx=250)
|
||||
makelabelentry(images_tab, "Image Threads:" , sd_threads_var, 8, 50,padx=290,singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.")
|
||||
sd_model_var.trace_add("write", gui_changed_modelfile)
|
||||
makelabelcombobox(images_tab, "Compress Weights (Saves Memory)", sd_quant_var, 10, width=50, labelpadx=65, tooltiptxt="Quantizes the SD model weights to save memory.\nHigher levels save more memory, and cause more quality degradation.", values=sd_quant_choices)
|
||||
makelabelcombobox(images_tab, "Compress Weights (Saves Memory): ", sd_quant_var, 10, width=60, padx=220, labelpadx=8, tooltiptxt="Quantizes the SD model weights to save memory.\nHigher levels save more memory, and cause more quality degradation.", values=sd_quant_choices)
|
||||
sd_quant_var.trace_add("write", changed_gpulayers_estimate)
|
||||
|
||||
makefileentry(images_tab, "Image LoRA (safetensors/gguf):", "Select SD lora file",sd_lora_var, 20, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded. Should be unquantized!")
|
||||
|
@ -5840,7 +5843,7 @@ def show_gui():
|
|||
sd_clamped_var.set(int(dict["sdclamped"]) if ("sdclamped" in dict and dict["sdclamped"]) else 0)
|
||||
sd_clamped_soft_var.set(int(dict["sdclampedsoft"]) if ("sdclampedsoft" in dict and dict["sdclampedsoft"]) else 0)
|
||||
sd_threads_var.set(str(dict["sdthreads"]) if ("sdthreads" in dict and dict["sdthreads"]) else str(default_threads))
|
||||
sd_quant_var.set(sd_quant_choices[(dict["sdquant"] if "sdquant" in dict else 0)])
|
||||
sd_quant_var.set(sd_quant_choices[(dict["sdquant"] if ("sdquant" in dict and dict["sdquant"]>=0 and dict["sdquant"]<len(sd_quant_choices)) else 0)])
|
||||
sd_flash_attention_var.set(1 if ("sdflashattention" in dict and dict["sdflashattention"]) else 0)
|
||||
sd_convdirect_var.set(sd_convdirect_option(dict.get("sdconvdirect")))
|
||||
sd_vae_var.set(dict["sdvae"] if ("sdvae" in dict and dict["sdvae"]) else "")
|
||||
|
|
|
@ -254,9 +254,9 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
sd_params = new SDParams();
|
||||
sd_params->model_path = inputs.model_filename;
|
||||
sd_params->wtype = SD_TYPE_COUNT;
|
||||
if (inputs.quant > 0 && inputs.quant < SD_TYPE_COUNT) {
|
||||
sd_params->wtype = (sd_type_t) inputs.quant;
|
||||
printf("\nDiffusion Model quantized to %s", sd_type_name(sd_params->wtype));
|
||||
if (inputs.quant > 0) {
|
||||
sd_params->wtype = (inputs.quant==1?SD_TYPE_Q8_0:SD_TYPE_Q4_0);
|
||||
printf("Diffusion Model quantized to %s\n", sd_type_name(sd_params->wtype));
|
||||
}
|
||||
sd_params->n_threads = inputs.threads; //if -1 use physical cores
|
||||
sd_params->diffusion_flash_attn = inputs.flash_attention;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue