dropped support for lora base as upstream no longer uses it. If provided it will be silently ignored

This commit is contained in:
Concedo 2025-06-02 12:49:53 +08:00
parent 51dc1cf920
commit 8e1ebc55b5
4 changed files with 4 additions and 35 deletions

View file

@ -33,7 +33,6 @@ extern "C"
{ {
std::string model = inputs.model_filename; std::string model = inputs.model_filename;
lora_filename = inputs.lora_filename; lora_filename = inputs.lora_filename;
lora_base = inputs.lora_base;
mmproj_filename = inputs.mmproj_filename; mmproj_filename = inputs.mmproj_filename;
draftmodel_filename = inputs.draftmodel_filename; draftmodel_filename = inputs.draftmodel_filename;

View file

@ -39,7 +39,6 @@ struct load_model_inputs
const char * executable_path = nullptr; const char * executable_path = nullptr;
const char * model_filename = nullptr; const char * model_filename = nullptr;
const char * lora_filename = nullptr; const char * lora_filename = nullptr;
const char * lora_base = nullptr;
const char * draftmodel_filename = nullptr; const char * draftmodel_filename = nullptr;
const int draft_amount = 8; const int draft_amount = 8;
const int draft_gpulayers = 999; const int draft_gpulayers = 999;
@ -273,7 +272,6 @@ struct embeddings_generation_outputs
extern std::string executable_path; extern std::string executable_path;
extern std::string lora_filename; extern std::string lora_filename;
extern std::string lora_base;
extern std::string mmproj_filename; extern std::string mmproj_filename;
extern std::string draftmodel_filename; extern std::string draftmodel_filename;
extern std::vector<std::string> generated_tokens; extern std::vector<std::string> generated_tokens;

View file

@ -52,7 +52,6 @@ const int LLAVA_TOKEN_IDENTIFIER_B = -999;
//shared //shared
std::string executable_path = ""; std::string executable_path = "";
std::string lora_filename = ""; std::string lora_filename = "";
std::string lora_base = "";
std::string mmproj_filename = ""; std::string mmproj_filename = "";
std::string draftmodel_filename = ""; std::string draftmodel_filename = "";
int speculative_chunk_amt = 8; //do it in chunks of this many tokens int speculative_chunk_amt = 8; //do it in chunks of this many tokens
@ -2058,15 +2057,9 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
{ {
printf("\nAttempting to apply LORA adapter: %s\n", lora_filename.c_str()); printf("\nAttempting to apply LORA adapter: %s\n", lora_filename.c_str());
const char * lora_base_arg = NULL;
if (lora_base != "") {
printf("Using LORA base model: %s\n", lora_base.c_str());
lora_base_arg = lora_base.c_str();
}
int err = llama_v2_apply_lora_from_file(llama_ctx_v2, int err = llama_v2_apply_lora_from_file(llama_ctx_v2,
lora_filename.c_str(), lora_filename.c_str(),
lora_base_arg, nullptr,
kcpp_data->n_threads); kcpp_data->n_threads);
if (err != 0) if (err != 0)
{ {
@ -2125,15 +2118,9 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
{ {
printf("\nAttempting to apply LORA adapter: %s\n", lora_filename.c_str()); printf("\nAttempting to apply LORA adapter: %s\n", lora_filename.c_str());
const char * lora_base_arg = NULL;
if (lora_base != "") {
printf("Using LORA base model: %s\n", lora_base.c_str());
lora_base_arg = lora_base.c_str();
}
int err = llama_v3_apply_lora_from_file(llama_ctx_v3, int err = llama_v3_apply_lora_from_file(llama_ctx_v3,
lora_filename.c_str(), lora_filename.c_str(),
lora_base_arg, nullptr,
kcpp_data->n_threads); kcpp_data->n_threads);
if (err != 0) if (err != 0)
{ {
@ -2382,13 +2369,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
if (lora_filename != "") if (lora_filename != "")
{ {
printf("\nAttempting to apply LORA adapter: %s\n", lora_filename.c_str()); printf("\nAttempting to apply LORA adapter: %s\n", lora_filename.c_str());
const char * lora_base_arg = NULL;
if (lora_base != "") {
printf("Using LORA base model: %s\n", lora_base.c_str());
lora_base_arg = lora_base.c_str();
}
auto adapter = llama_adapter_lora_init(llamamodel, lora_filename.c_str()); auto adapter = llama_adapter_lora_init(llamamodel, lora_filename.c_str());
if (adapter == nullptr) { if (adapter == nullptr) {
fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__); fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);

View file

@ -161,7 +161,6 @@ class load_model_inputs(ctypes.Structure):
("executable_path", ctypes.c_char_p), ("executable_path", ctypes.c_char_p),
("model_filename", ctypes.c_char_p), ("model_filename", ctypes.c_char_p),
("lora_filename", ctypes.c_char_p), ("lora_filename", ctypes.c_char_p),
("lora_base", ctypes.c_char_p),
("draftmodel_filename", ctypes.c_char_p), ("draftmodel_filename", ctypes.c_char_p),
("draft_amount", ctypes.c_int), ("draft_amount", ctypes.c_int),
("draft_gpulayers", ctypes.c_int), ("draft_gpulayers", ctypes.c_int),
@ -1198,13 +1197,10 @@ def load_model(model_filename):
inputs.use_mmap = args.usemmap inputs.use_mmap = args.usemmap
inputs.use_mlock = args.usemlock inputs.use_mlock = args.usemlock
inputs.lora_filename = "".encode("UTF-8") inputs.lora_filename = "".encode("UTF-8")
inputs.lora_base = "".encode("UTF-8")
inputs.lora_multiplier = args.loramult inputs.lora_multiplier = args.loramult
if args.lora: if args.lora:
inputs.lora_filename = args.lora[0].encode("UTF-8") inputs.lora_filename = args.lora[0].encode("UTF-8")
inputs.use_mmap = False inputs.use_mmap = False
if len(args.lora) > 1:
inputs.lora_base = args.lora[1].encode("UTF-8")
inputs.draftmodel_filename = args.draftmodel.encode("UTF-8") if args.draftmodel else "".encode("UTF-8") inputs.draftmodel_filename = args.draftmodel.encode("UTF-8") if args.draftmodel else "".encode("UTF-8")
inputs.draft_amount = args.draftamount inputs.draft_amount = args.draftamount
@ -4146,7 +4142,6 @@ def show_gui():
model_var = ctk.StringVar() model_var = ctk.StringVar()
lora_var = ctk.StringVar() lora_var = ctk.StringVar()
lora_base_var = ctk.StringVar()
loramult_var = ctk.StringVar(value="1.0") loramult_var = ctk.StringVar(value="1.0")
preloadstory_var = ctk.StringVar() preloadstory_var = ctk.StringVar()
savedatafile_var = ctk.StringVar() savedatafile_var = ctk.StringVar()
@ -4796,7 +4791,6 @@ def show_gui():
ctk.CTkButton(model_tab, width=70, text = "HF Search", command = model_searcher ).grid(row=1,column=0, stick="nw", padx=370) ctk.CTkButton(model_tab, width=70, text = "HF Search", command = model_searcher ).grid(row=1,column=0, stick="nw", padx=370)
makefileentry(model_tab, "Text Lora:", "Select Lora File",lora_var, 3,width=160,singlerow=True,tooltiptxt="Select an optional GGML Text LoRA adapter to use.\nLeave blank to skip.") makefileentry(model_tab, "Text Lora:", "Select Lora File",lora_var, 3,width=160,singlerow=True,tooltiptxt="Select an optional GGML Text LoRA adapter to use.\nLeave blank to skip.")
makelabelentry(model_tab, "Multiplier: ", loramult_var, 3, 50,padx=390,singleline=True,tooltip="Scale multiplier for Text LoRA Strength. Default is 1.0", labelpadx=330) makelabelentry(model_tab, "Multiplier: ", loramult_var, 3, 50,padx=390,singleline=True,tooltip="Scale multiplier for Text LoRA Strength. Default is 1.0", labelpadx=330)
makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5,width=280,singlerow=True,tooltiptxt="Select an optional F16 GGML Text LoRA base file to use.\nLeave blank to skip.")
makefileentry(model_tab, "Vision mmproj:", "Select Vision mmproj File", mmproj_var, 7,width=280,singlerow=True,tooltiptxt="Select a mmproj file to use for vision models like LLaVA.\nLeave blank to skip.") makefileentry(model_tab, "Vision mmproj:", "Select Vision mmproj File", mmproj_var, 7,width=280,singlerow=True,tooltiptxt="Select a mmproj file to use for vision models like LLaVA.\nLeave blank to skip.")
makecheckbox(model_tab, "Vision Force CPU", mmprojcpu_var, 9, tooltiptxt="Force CLIP for Vision mmproj always on CPU.") makecheckbox(model_tab, "Vision Force CPU", mmprojcpu_var, 9, tooltiptxt="Force CLIP for Vision mmproj always on CPU.")
makelabelentry(model_tab, "Vision MaxRes:", visionmaxres_var, 9, padx=320, singleline=True, tooltip=f"Clamp MMProj vision maximum allowed resolution. Allowed values are between 512 to 2048 px (default {default_visionmaxres}).", labelpadx=220) makelabelentry(model_tab, "Vision MaxRes:", visionmaxres_var, 9, padx=320, singleline=True, tooltip=f"Clamp MMProj vision maximum allowed resolution. Allowed values are between 512 to 2048 px (default {default_visionmaxres}).", labelpadx=220)
@ -5082,7 +5076,7 @@ def show_gui():
pass pass
args.model_param = None if model_var.get() == "" else model_var.get() args.model_param = None if model_var.get() == "" else model_var.get()
args.lora = None if lora_var.get() == "" else ([lora_var.get()] if lora_base_var.get()=="" else [lora_var.get(), lora_base_var.get()]) args.lora = None if lora_var.get() == "" else ([lora_var.get()])
args.loramult = (float(loramult_var.get()) if loramult_var.get()!="" else 1.0) args.loramult = (float(loramult_var.get()) if loramult_var.get()!="" else 1.0)
args.preloadstory = None if preloadstory_var.get() == "" else preloadstory_var.get() args.preloadstory = None if preloadstory_var.get() == "" else preloadstory_var.get()
args.savedatafile = None if savedatafile_var.get() == "" else savedatafile_var.get() args.savedatafile = None if savedatafile_var.get() == "" else savedatafile_var.get()
@ -5281,11 +5275,9 @@ def show_gui():
model_var.set(dict["model_param"] if ("model_param" in dict and dict["model_param"]) else "") model_var.set(dict["model_param"] if ("model_param" in dict and dict["model_param"]) else "")
lora_var.set("") lora_var.set("")
lora_base_var.set("")
if "lora" in dict and dict["lora"]: if "lora" in dict and dict["lora"]:
if len(dict["lora"]) > 1: if len(dict["lora"]) > 1:
lora_var.set(dict["lora"][0]) lora_var.set(dict["lora"][0])
lora_base_var.set(dict["lora"][1])
else: else:
lora_var.set(dict["lora"][0]) lora_var.set(dict["lora"][0])
loramult_var.set(str(dict["loramult"]) if ("loramult" in dict and dict["loramult"]) else "1.0") loramult_var.set(str(dict["loramult"]) if ("loramult" in dict and dict["loramult"]) else "1.0")
@ -6994,7 +6986,7 @@ if __name__ == '__main__':
advparser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+') advparser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
advparser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,16,32,64,128,256,512,1024,2048], default=512) advparser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,16,32,64,128,256,512,1024,2048], default=512)
advparser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0) advparser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
advparser.add_argument("--lora", help="GGUF models only, applies a lora file on top of model.", metavar=('[lora_filename]', '[lora_base]'), nargs='+') advparser.add_argument("--lora", help="GGUF models only, applies a lora file on top of model.", metavar=('[lora_filename]'), nargs='+')
advparser.add_argument("--loramult", metavar=('[amount]'), help="Multiplier for the Text LORA model to be applied.", type=float, default=1.0) advparser.add_argument("--loramult", metavar=('[amount]'), help="Multiplier for the Text LORA model to be applied.", type=float, default=1.0)
advparser.add_argument("--noshift", help="If set, do not attempt to Trim and Shift the GGUF context.", action='store_true') advparser.add_argument("--noshift", help="If set, do not attempt to Trim and Shift the GGUF context.", action='store_true')
advparser.add_argument("--nofastforward", help="If set, do not attempt to fast forward GGUF context (always reprocess). Will also enable noshift", action='store_true') advparser.add_argument("--nofastforward", help="If set, do not attempt to fast forward GGUF context (always reprocess). Will also enable noshift", action='store_true')