From fe12b1cbd4f0187673e25b6c27b5879c7d23c314 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 14 Apr 2025 23:44:42 +0800 Subject: [PATCH] fixed lora, now works quanted too --- .gitignore | 2 ++ koboldcpp.py | 34 +++++++++------------------------- otherarch/sdcpp/lora.hpp | 2 +- 3 files changed, 12 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 84b17fc10..6477772de 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,8 @@ ggml/src/ggml-vulkan-shaders-noext.cpp ggml/src/ggml-vulkan-shaders-noext.hpp vulkan-shaders-gen-noext.exe vulkan-shaders-gen-noext +vulkan-spv-tmp/* +vulkan-spv-noext-tmp/* lcov-report/ gcovr-report/ diff --git a/koboldcpp.py b/koboldcpp.py index 921b790a3..a84c64ac7 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -3690,7 +3690,6 @@ def show_gui(): toggleflashattn(1,1,1) togglectxshift(1,1,1) togglehorde(1,1,1) - togglesdquant(1,1,1) toggletaesd(1,1,1) tabbuttonaction(tabnames[curr_tab_idx]) @@ -4400,23 +4399,10 @@ def show_gui(): makelabelentry(images_tab, "Image Threads:" , sd_threads_var, 6, 50,padx=290,singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.") sd_model_var.trace("w", gui_changed_modelfile) - sdloritem1,sdloritem2,sdloritem3 = makefileentry(images_tab, "Image LoRA (Must be non-quant):", "Select SD lora file",sd_lora_var, 10, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded.") - sdloritem4,sdloritem5 = makelabelentry(images_tab, "Image LoRA Multiplier:" , sd_loramult_var, 12, 50,padx=290,singleline=True,tooltip="What mutiplier value to apply the SD LoRA with.") - def togglesdquant(a,b,c): - if sd_quant_var.get()==1: - sdloritem1.grid_remove() - sdloritem2.grid_remove() - sdloritem3.grid_remove() - sdloritem4.grid_remove() - sdloritem5.grid_remove() - else: - if not sdloritem1.grid_info() or not sdloritem2.grid_info() or not sdloritem3.grid_info() or not sdloritem4.grid_info() or not sdloritem5.grid_info(): - sdloritem1.grid() - sdloritem2.grid() - sdloritem3.grid() - sdloritem4.grid() - sdloritem5.grid() - makecheckbox(images_tab, "Compress Weights (Saves Memory)", sd_quant_var, 8,command=togglesdquant,tooltiptxt="Quantizes the SD model weights to save memory. May degrade quality.") + makefileentry(images_tab, "Image LoRA (safetensors/gguf):", "Select SD lora file",sd_lora_var, 10, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded. Should be unquantized!") + makelabelentry(images_tab, "Image LoRA Multiplier:" , sd_loramult_var, 12, 50,padx=290,singleline=True,tooltip="What mutiplier value to apply the SD LoRA with.") + + makecheckbox(images_tab, "Compress Weights (Saves Memory)", sd_quant_var, 8,tooltiptxt="Quantizes the SD model weights to save memory. May degrade quality.") sd_quant_var.trace("w", changed_gpulayers_estimate) makefileentry(images_tab, "T5-XXL File:", "Select Optional T5-XXL model file (SD3 or flux)",sd_t5xxl_var, 14, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.") @@ -4665,13 +4651,11 @@ def show_gui(): args.sdclipg = sd_clipg_var.get() if sd_quant_var.get()==1: args.sdquant = True - args.sdlora = "" + if sd_lora_var.get() != "": + args.sdlora = sd_lora_var.get() + args.sdloramult = float(sd_loramult_var.get()) else: - if sd_lora_var.get() != "": - args.sdlora = sd_lora_var.get() - args.sdloramult = float(sd_loramult_var.get()) - else: - args.sdlora = "" + args.sdlora = "" if whisper_model_var.get() != "": args.whispermodel = whisper_model_var.get() @@ -6531,7 +6515,7 @@ if __name__ == '__main__': sdparsergroupvae.add_argument("--sdvaeauto", help="Uses a built-in VAE via TAE SD, which is very fast, and fixed bad VAEs.", action='store_true') sdparsergrouplora = sdparsergroup.add_mutually_exclusive_group() sdparsergrouplora.add_argument("--sdquant", help="If specified, loads the model quantized to save memory.", action='store_true') - sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify a stable diffusion LORA safetensors model to be applied. Cannot be used with quant models.", default="") + sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify a stable diffusion LORA safetensors model to be applied.", default="") sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the LORA model to be applied.", type=float, default=1.0) sdparsergroup.add_argument("--sdnotile", help="Disables VAE tiling, may not work for large images.", action='store_true') diff --git a/otherarch/sdcpp/lora.hpp b/otherarch/sdcpp/lora.hpp index a462b36b4..f7f46ea48 100644 --- a/otherarch/sdcpp/lora.hpp +++ b/otherarch/sdcpp/lora.hpp @@ -3,7 +3,7 @@ #include "ggml_extend.hpp" -#define LORA_GRAPH_SIZE 10240 +#define LORA_GRAPH_SIZE 20480 struct LoraModel : public GGMLRunner { enum lora_t {