mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
added scale for text lora
This commit is contained in:
parent
74ef097c4a
commit
51dc1cf920
3 changed files with 12 additions and 4 deletions
1
expose.h
1
expose.h
|
@ -71,6 +71,7 @@ struct load_model_inputs
|
||||||
const int quant_v = 0;
|
const int quant_v = 0;
|
||||||
const bool check_slowness = false;
|
const bool check_slowness = false;
|
||||||
const bool swa_support = false;
|
const bool swa_support = false;
|
||||||
|
const float lora_multiplier = 1.0f;
|
||||||
const bool quiet = false;
|
const bool quiet = false;
|
||||||
const int debugmode = 0;
|
const int debugmode = 0;
|
||||||
};
|
};
|
||||||
|
|
|
@ -2394,7 +2394,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
||||||
return ModelLoadResult::FAIL;
|
return ModelLoadResult::FAIL;
|
||||||
}
|
}
|
||||||
llama_set_adapter_lora(llama_ctx_v4, adapter, 1.0f);
|
llama_set_adapter_lora(llama_ctx_v4, adapter, inputs.lora_multiplier);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(mmproj_filename != "" && file_format==FileFormat::GGUF_GENERIC)
|
if(mmproj_filename != "" && file_format==FileFormat::GGUF_GENERIC)
|
||||||
|
|
13
koboldcpp.py
13
koboldcpp.py
|
@ -193,6 +193,7 @@ class load_model_inputs(ctypes.Structure):
|
||||||
("quant_v", ctypes.c_int),
|
("quant_v", ctypes.c_int),
|
||||||
("check_slowness", ctypes.c_bool),
|
("check_slowness", ctypes.c_bool),
|
||||||
("swa_support", ctypes.c_bool),
|
("swa_support", ctypes.c_bool),
|
||||||
|
("lora_multiplier", ctypes.c_float),
|
||||||
("quiet", ctypes.c_bool),
|
("quiet", ctypes.c_bool),
|
||||||
("debugmode", ctypes.c_int)]
|
("debugmode", ctypes.c_int)]
|
||||||
|
|
||||||
|
@ -1198,6 +1199,7 @@ def load_model(model_filename):
|
||||||
inputs.use_mlock = args.usemlock
|
inputs.use_mlock = args.usemlock
|
||||||
inputs.lora_filename = "".encode("UTF-8")
|
inputs.lora_filename = "".encode("UTF-8")
|
||||||
inputs.lora_base = "".encode("UTF-8")
|
inputs.lora_base = "".encode("UTF-8")
|
||||||
|
inputs.lora_multiplier = args.loramult
|
||||||
if args.lora:
|
if args.lora:
|
||||||
inputs.lora_filename = args.lora[0].encode("UTF-8")
|
inputs.lora_filename = args.lora[0].encode("UTF-8")
|
||||||
inputs.use_mmap = False
|
inputs.use_mmap = False
|
||||||
|
@ -4145,6 +4147,7 @@ def show_gui():
|
||||||
model_var = ctk.StringVar()
|
model_var = ctk.StringVar()
|
||||||
lora_var = ctk.StringVar()
|
lora_var = ctk.StringVar()
|
||||||
lora_base_var = ctk.StringVar()
|
lora_base_var = ctk.StringVar()
|
||||||
|
loramult_var = ctk.StringVar(value="1.0")
|
||||||
preloadstory_var = ctk.StringVar()
|
preloadstory_var = ctk.StringVar()
|
||||||
savedatafile_var = ctk.StringVar()
|
savedatafile_var = ctk.StringVar()
|
||||||
mmproj_var = ctk.StringVar()
|
mmproj_var = ctk.StringVar()
|
||||||
|
@ -4791,7 +4794,8 @@ def show_gui():
|
||||||
|
|
||||||
makefileentry(model_tab, "Text Model:", "Select GGUF or GGML Model File", model_var, 1,width=205,singlerow=True, onchoosefile=on_picked_model_file,tooltiptxt="Select a GGUF or GGML model file on disk to be loaded.")
|
makefileentry(model_tab, "Text Model:", "Select GGUF or GGML Model File", model_var, 1,width=205,singlerow=True, onchoosefile=on_picked_model_file,tooltiptxt="Select a GGUF or GGML model file on disk to be loaded.")
|
||||||
ctk.CTkButton(model_tab, width=70, text = "HF Search", command = model_searcher ).grid(row=1,column=0, stick="nw", padx=370)
|
ctk.CTkButton(model_tab, width=70, text = "HF Search", command = model_searcher ).grid(row=1,column=0, stick="nw", padx=370)
|
||||||
makefileentry(model_tab, "Text Lora:", "Select Lora File",lora_var, 3,width=280,singlerow=True,tooltiptxt="Select an optional GGML Text LoRA adapter to use.\nLeave blank to skip.")
|
makefileentry(model_tab, "Text Lora:", "Select Lora File",lora_var, 3,width=160,singlerow=True,tooltiptxt="Select an optional GGML Text LoRA adapter to use.\nLeave blank to skip.")
|
||||||
|
makelabelentry(model_tab, "Multiplier: ", loramult_var, 3, 50,padx=390,singleline=True,tooltip="Scale multiplier for Text LoRA Strength. Default is 1.0", labelpadx=330)
|
||||||
makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5,width=280,singlerow=True,tooltiptxt="Select an optional F16 GGML Text LoRA base file to use.\nLeave blank to skip.")
|
makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5,width=280,singlerow=True,tooltiptxt="Select an optional F16 GGML Text LoRA base file to use.\nLeave blank to skip.")
|
||||||
makefileentry(model_tab, "Vision mmproj:", "Select Vision mmproj File", mmproj_var, 7,width=280,singlerow=True,tooltiptxt="Select a mmproj file to use for vision models like LLaVA.\nLeave blank to skip.")
|
makefileentry(model_tab, "Vision mmproj:", "Select Vision mmproj File", mmproj_var, 7,width=280,singlerow=True,tooltiptxt="Select a mmproj file to use for vision models like LLaVA.\nLeave blank to skip.")
|
||||||
makecheckbox(model_tab, "Vision Force CPU", mmprojcpu_var, 9, tooltiptxt="Force CLIP for Vision mmproj always on CPU.")
|
makecheckbox(model_tab, "Vision Force CPU", mmprojcpu_var, 9, tooltiptxt="Force CLIP for Vision mmproj always on CPU.")
|
||||||
|
@ -5079,6 +5083,7 @@ def show_gui():
|
||||||
|
|
||||||
args.model_param = None if model_var.get() == "" else model_var.get()
|
args.model_param = None if model_var.get() == "" else model_var.get()
|
||||||
args.lora = None if lora_var.get() == "" else ([lora_var.get()] if lora_base_var.get()=="" else [lora_var.get(), lora_base_var.get()])
|
args.lora = None if lora_var.get() == "" else ([lora_var.get()] if lora_base_var.get()=="" else [lora_var.get(), lora_base_var.get()])
|
||||||
|
args.loramult = (float(loramult_var.get()) if loramult_var.get()!="" else 1.0)
|
||||||
args.preloadstory = None if preloadstory_var.get() == "" else preloadstory_var.get()
|
args.preloadstory = None if preloadstory_var.get() == "" else preloadstory_var.get()
|
||||||
args.savedatafile = None if savedatafile_var.get() == "" else savedatafile_var.get()
|
args.savedatafile = None if savedatafile_var.get() == "" else savedatafile_var.get()
|
||||||
try:
|
try:
|
||||||
|
@ -5283,6 +5288,7 @@ def show_gui():
|
||||||
lora_base_var.set(dict["lora"][1])
|
lora_base_var.set(dict["lora"][1])
|
||||||
else:
|
else:
|
||||||
lora_var.set(dict["lora"][0])
|
lora_var.set(dict["lora"][0])
|
||||||
|
loramult_var.set(str(dict["loramult"]) if ("loramult" in dict and dict["loramult"]) else "1.0")
|
||||||
|
|
||||||
mmproj_var.set(dict["mmproj"] if ("mmproj" in dict and dict["mmproj"]) else "")
|
mmproj_var.set(dict["mmproj"] if ("mmproj" in dict and dict["mmproj"]) else "")
|
||||||
mmprojcpu_var.set(1 if ("mmprojcpu" in dict and dict["mmprojcpu"]) else 0)
|
mmprojcpu_var.set(1 if ("mmprojcpu" in dict and dict["mmprojcpu"]) else 0)
|
||||||
|
@ -6988,7 +6994,8 @@ if __name__ == '__main__':
|
||||||
advparser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
|
advparser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
|
||||||
advparser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,16,32,64,128,256,512,1024,2048], default=512)
|
advparser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,16,32,64,128,256,512,1024,2048], default=512)
|
||||||
advparser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
|
advparser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
|
||||||
advparser.add_argument("--lora", help="LLAMA models only, applies a lora file on top of model. Experimental.", metavar=('[lora_filename]', '[lora_base]'), nargs='+')
|
advparser.add_argument("--lora", help="GGUF models only, applies a lora file on top of model.", metavar=('[lora_filename]', '[lora_base]'), nargs='+')
|
||||||
|
advparser.add_argument("--loramult", metavar=('[amount]'), help="Multiplier for the Text LORA model to be applied.", type=float, default=1.0)
|
||||||
advparser.add_argument("--noshift", help="If set, do not attempt to Trim and Shift the GGUF context.", action='store_true')
|
advparser.add_argument("--noshift", help="If set, do not attempt to Trim and Shift the GGUF context.", action='store_true')
|
||||||
advparser.add_argument("--nofastforward", help="If set, do not attempt to fast forward GGUF context (always reprocess). Will also enable noshift", action='store_true')
|
advparser.add_argument("--nofastforward", help="If set, do not attempt to fast forward GGUF context (always reprocess). Will also enable noshift", action='store_true')
|
||||||
advparser.add_argument("--useswa", help="If set, allows Sliding Window Attention (SWA) KV Cache, which saves memory but cannot be used with context shifting.", action='store_true')
|
advparser.add_argument("--useswa", help="If set, allows Sliding Window Attention (SWA) KV Cache, which saves memory but cannot be used with context shifting.", action='store_true')
|
||||||
|
@ -7064,7 +7071,7 @@ if __name__ == '__main__':
|
||||||
sdparsergrouplora = sdparsergroup.add_mutually_exclusive_group()
|
sdparsergrouplora = sdparsergroup.add_mutually_exclusive_group()
|
||||||
sdparsergrouplora.add_argument("--sdquant", help="If specified, loads the model quantized to save memory.", action='store_true')
|
sdparsergrouplora.add_argument("--sdquant", help="If specified, loads the model quantized to save memory.", action='store_true')
|
||||||
sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify an image generation LORA safetensors model to be applied.", default="")
|
sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify an image generation LORA safetensors model to be applied.", default="")
|
||||||
sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the LORA model to be applied.", type=float, default=1.0)
|
sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the image LORA model to be applied.", type=float, default=1.0)
|
||||||
sdparsergroup.add_argument("--sdnotile", help="Disables VAE tiling, may not work for large images.", action='store_true')
|
sdparsergroup.add_argument("--sdnotile", help="Disables VAE tiling, may not work for large images.", action='store_true')
|
||||||
|
|
||||||
whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands')
|
whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands')
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue