mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
fixed lora, now works quanted too
This commit is contained in:
parent
ad2522b319
commit
fe12b1cbd4
3 changed files with 12 additions and 26 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -22,6 +22,8 @@ ggml/src/ggml-vulkan-shaders-noext.cpp
|
||||||
ggml/src/ggml-vulkan-shaders-noext.hpp
|
ggml/src/ggml-vulkan-shaders-noext.hpp
|
||||||
vulkan-shaders-gen-noext.exe
|
vulkan-shaders-gen-noext.exe
|
||||||
vulkan-shaders-gen-noext
|
vulkan-shaders-gen-noext
|
||||||
|
vulkan-spv-tmp/*
|
||||||
|
vulkan-spv-noext-tmp/*
|
||||||
|
|
||||||
lcov-report/
|
lcov-report/
|
||||||
gcovr-report/
|
gcovr-report/
|
||||||
|
|
34
koboldcpp.py
34
koboldcpp.py
|
@ -3690,7 +3690,6 @@ def show_gui():
|
||||||
toggleflashattn(1,1,1)
|
toggleflashattn(1,1,1)
|
||||||
togglectxshift(1,1,1)
|
togglectxshift(1,1,1)
|
||||||
togglehorde(1,1,1)
|
togglehorde(1,1,1)
|
||||||
togglesdquant(1,1,1)
|
|
||||||
toggletaesd(1,1,1)
|
toggletaesd(1,1,1)
|
||||||
tabbuttonaction(tabnames[curr_tab_idx])
|
tabbuttonaction(tabnames[curr_tab_idx])
|
||||||
|
|
||||||
|
@ -4400,23 +4399,10 @@ def show_gui():
|
||||||
makelabelentry(images_tab, "Image Threads:" , sd_threads_var, 6, 50,padx=290,singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.")
|
makelabelentry(images_tab, "Image Threads:" , sd_threads_var, 6, 50,padx=290,singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.")
|
||||||
sd_model_var.trace("w", gui_changed_modelfile)
|
sd_model_var.trace("w", gui_changed_modelfile)
|
||||||
|
|
||||||
sdloritem1,sdloritem2,sdloritem3 = makefileentry(images_tab, "Image LoRA (Must be non-quant):", "Select SD lora file",sd_lora_var, 10, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded.")
|
makefileentry(images_tab, "Image LoRA (safetensors/gguf):", "Select SD lora file",sd_lora_var, 10, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded. Should be unquantized!")
|
||||||
sdloritem4,sdloritem5 = makelabelentry(images_tab, "Image LoRA Multiplier:" , sd_loramult_var, 12, 50,padx=290,singleline=True,tooltip="What mutiplier value to apply the SD LoRA with.")
|
makelabelentry(images_tab, "Image LoRA Multiplier:" , sd_loramult_var, 12, 50,padx=290,singleline=True,tooltip="What mutiplier value to apply the SD LoRA with.")
|
||||||
def togglesdquant(a,b,c):
|
|
||||||
if sd_quant_var.get()==1:
|
makecheckbox(images_tab, "Compress Weights (Saves Memory)", sd_quant_var, 8,tooltiptxt="Quantizes the SD model weights to save memory. May degrade quality.")
|
||||||
sdloritem1.grid_remove()
|
|
||||||
sdloritem2.grid_remove()
|
|
||||||
sdloritem3.grid_remove()
|
|
||||||
sdloritem4.grid_remove()
|
|
||||||
sdloritem5.grid_remove()
|
|
||||||
else:
|
|
||||||
if not sdloritem1.grid_info() or not sdloritem2.grid_info() or not sdloritem3.grid_info() or not sdloritem4.grid_info() or not sdloritem5.grid_info():
|
|
||||||
sdloritem1.grid()
|
|
||||||
sdloritem2.grid()
|
|
||||||
sdloritem3.grid()
|
|
||||||
sdloritem4.grid()
|
|
||||||
sdloritem5.grid()
|
|
||||||
makecheckbox(images_tab, "Compress Weights (Saves Memory)", sd_quant_var, 8,command=togglesdquant,tooltiptxt="Quantizes the SD model weights to save memory. May degrade quality.")
|
|
||||||
sd_quant_var.trace("w", changed_gpulayers_estimate)
|
sd_quant_var.trace("w", changed_gpulayers_estimate)
|
||||||
|
|
||||||
makefileentry(images_tab, "T5-XXL File:", "Select Optional T5-XXL model file (SD3 or flux)",sd_t5xxl_var, 14, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.")
|
makefileentry(images_tab, "T5-XXL File:", "Select Optional T5-XXL model file (SD3 or flux)",sd_t5xxl_var, 14, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.")
|
||||||
|
@ -4665,13 +4651,11 @@ def show_gui():
|
||||||
args.sdclipg = sd_clipg_var.get()
|
args.sdclipg = sd_clipg_var.get()
|
||||||
if sd_quant_var.get()==1:
|
if sd_quant_var.get()==1:
|
||||||
args.sdquant = True
|
args.sdquant = True
|
||||||
args.sdlora = ""
|
if sd_lora_var.get() != "":
|
||||||
|
args.sdlora = sd_lora_var.get()
|
||||||
|
args.sdloramult = float(sd_loramult_var.get())
|
||||||
else:
|
else:
|
||||||
if sd_lora_var.get() != "":
|
args.sdlora = ""
|
||||||
args.sdlora = sd_lora_var.get()
|
|
||||||
args.sdloramult = float(sd_loramult_var.get())
|
|
||||||
else:
|
|
||||||
args.sdlora = ""
|
|
||||||
|
|
||||||
if whisper_model_var.get() != "":
|
if whisper_model_var.get() != "":
|
||||||
args.whispermodel = whisper_model_var.get()
|
args.whispermodel = whisper_model_var.get()
|
||||||
|
@ -6531,7 +6515,7 @@ if __name__ == '__main__':
|
||||||
sdparsergroupvae.add_argument("--sdvaeauto", help="Uses a built-in VAE via TAE SD, which is very fast, and fixed bad VAEs.", action='store_true')
|
sdparsergroupvae.add_argument("--sdvaeauto", help="Uses a built-in VAE via TAE SD, which is very fast, and fixed bad VAEs.", action='store_true')
|
||||||
sdparsergrouplora = sdparsergroup.add_mutually_exclusive_group()
|
sdparsergrouplora = sdparsergroup.add_mutually_exclusive_group()
|
||||||
sdparsergrouplora.add_argument("--sdquant", help="If specified, loads the model quantized to save memory.", action='store_true')
|
sdparsergrouplora.add_argument("--sdquant", help="If specified, loads the model quantized to save memory.", action='store_true')
|
||||||
sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify a stable diffusion LORA safetensors model to be applied. Cannot be used with quant models.", default="")
|
sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify a stable diffusion LORA safetensors model to be applied.", default="")
|
||||||
sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the LORA model to be applied.", type=float, default=1.0)
|
sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the LORA model to be applied.", type=float, default=1.0)
|
||||||
sdparsergroup.add_argument("--sdnotile", help="Disables VAE tiling, may not work for large images.", action='store_true')
|
sdparsergroup.add_argument("--sdnotile", help="Disables VAE tiling, may not work for large images.", action='store_true')
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
#include "ggml_extend.hpp"
|
#include "ggml_extend.hpp"
|
||||||
|
|
||||||
#define LORA_GRAPH_SIZE 10240
|
#define LORA_GRAPH_SIZE 20480
|
||||||
|
|
||||||
struct LoraModel : public GGMLRunner {
|
struct LoraModel : public GGMLRunner {
|
||||||
enum lora_t {
|
enum lora_t {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue