mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
allow mmproj to be run on cpu
This commit is contained in:
parent
f968079290
commit
9cd6a1add2
3 changed files with 14 additions and 1 deletions
1
expose.h
1
expose.h
|
@ -45,6 +45,7 @@ struct load_model_inputs
|
||||||
const int draft_gpulayers = 999;
|
const int draft_gpulayers = 999;
|
||||||
const float draft_gpusplit[tensor_split_max] = {};
|
const float draft_gpusplit[tensor_split_max] = {};
|
||||||
const char * mmproj_filename = nullptr;
|
const char * mmproj_filename = nullptr;
|
||||||
|
const bool mmproj_cpu = false;
|
||||||
const int visionmaxres = 2048;
|
const int visionmaxres = 2048;
|
||||||
const bool use_mmap = false;
|
const bool use_mmap = false;
|
||||||
const bool use_mlock = false;
|
const bool use_mlock = false;
|
||||||
|
|
|
@ -2320,6 +2320,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
printf("Clip will use CPU for this model!\n");
|
printf("Clip will use CPU for this model!\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
if(inputs.mmproj_cpu)
|
||||||
|
{
|
||||||
|
set_clip_uses_gpu(false);
|
||||||
|
printf("Clip forced to use CPU!\n");
|
||||||
|
}
|
||||||
clp_ctx = clip_model_load(mmproj_filename.c_str(), /*verbosity=*/ 1);
|
clp_ctx = clip_model_load(mmproj_filename.c_str(), /*verbosity=*/ 1);
|
||||||
if(clp_ctx == nullptr) {
|
if(clp_ctx == nullptr) {
|
||||||
fprintf(stderr, "%s: error: failed to load mmproj model!\n", __func__);
|
fprintf(stderr, "%s: error: failed to load mmproj model!\n", __func__);
|
||||||
|
|
|
@ -165,6 +165,7 @@ class load_model_inputs(ctypes.Structure):
|
||||||
("draft_gpulayers", ctypes.c_int),
|
("draft_gpulayers", ctypes.c_int),
|
||||||
("draft_gpusplit", ctypes.c_float * tensor_split_max),
|
("draft_gpusplit", ctypes.c_float * tensor_split_max),
|
||||||
("mmproj_filename", ctypes.c_char_p),
|
("mmproj_filename", ctypes.c_char_p),
|
||||||
|
("mmproj_cpu", ctypes.c_bool),
|
||||||
("visionmaxres", ctypes.c_int),
|
("visionmaxres", ctypes.c_int),
|
||||||
("use_mmap", ctypes.c_bool),
|
("use_mmap", ctypes.c_bool),
|
||||||
("use_mlock", ctypes.c_bool),
|
("use_mlock", ctypes.c_bool),
|
||||||
|
@ -1184,6 +1185,7 @@ def load_model(model_filename):
|
||||||
else:
|
else:
|
||||||
inputs.draft_gpusplit[n] = 0
|
inputs.draft_gpusplit[n] = 0
|
||||||
inputs.mmproj_filename = args.mmproj.encode("UTF-8") if args.mmproj else "".encode("UTF-8")
|
inputs.mmproj_filename = args.mmproj.encode("UTF-8") if args.mmproj else "".encode("UTF-8")
|
||||||
|
inputs.mmproj_cpu = (True if args.mmprojcpu else False)
|
||||||
inputs.visionmaxres = (512 if args.visionmaxres < 512 else (2048 if args.visionmaxres > 2048 else args.visionmaxres))
|
inputs.visionmaxres = (512 if args.visionmaxres < 512 else (2048 if args.visionmaxres > 2048 else args.visionmaxres))
|
||||||
inputs.use_smartcontext = args.smartcontext
|
inputs.use_smartcontext = args.smartcontext
|
||||||
inputs.use_contextshift = (0 if args.noshift else 1)
|
inputs.use_contextshift = (0 if args.noshift else 1)
|
||||||
|
@ -3879,6 +3881,7 @@ def show_gui():
|
||||||
preloadstory_var = ctk.StringVar()
|
preloadstory_var = ctk.StringVar()
|
||||||
savedatafile_var = ctk.StringVar()
|
savedatafile_var = ctk.StringVar()
|
||||||
mmproj_var = ctk.StringVar()
|
mmproj_var = ctk.StringVar()
|
||||||
|
mmprojcpu_var = ctk.IntVar(value=0)
|
||||||
visionmaxres_var = ctk.StringVar(value=str(default_visionmaxres))
|
visionmaxres_var = ctk.StringVar(value=str(default_visionmaxres))
|
||||||
draftmodel_var = ctk.StringVar()
|
draftmodel_var = ctk.StringVar()
|
||||||
draftamount_var = ctk.StringVar(value=str(default_draft_amount))
|
draftamount_var = ctk.StringVar(value=str(default_draft_amount))
|
||||||
|
@ -4408,7 +4411,8 @@ def show_gui():
|
||||||
makefileentry(model_tab, "Text Lora:", "Select Lora File",lora_var, 3,width=280,singlerow=True,tooltiptxt="Select an optional GGML Text LoRA adapter to use.\nLeave blank to skip.")
|
makefileentry(model_tab, "Text Lora:", "Select Lora File",lora_var, 3,width=280,singlerow=True,tooltiptxt="Select an optional GGML Text LoRA adapter to use.\nLeave blank to skip.")
|
||||||
makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5,width=280,singlerow=True,tooltiptxt="Select an optional F16 GGML Text LoRA base file to use.\nLeave blank to skip.")
|
makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5,width=280,singlerow=True,tooltiptxt="Select an optional F16 GGML Text LoRA base file to use.\nLeave blank to skip.")
|
||||||
makefileentry(model_tab, "Vision mmproj:", "Select Vision mmproj File", mmproj_var, 7,width=280,singlerow=True,tooltiptxt="Select a mmproj file to use for vision models like LLaVA.\nLeave blank to skip.")
|
makefileentry(model_tab, "Vision mmproj:", "Select Vision mmproj File", mmproj_var, 7,width=280,singlerow=True,tooltiptxt="Select a mmproj file to use for vision models like LLaVA.\nLeave blank to skip.")
|
||||||
makelabelentry(model_tab, "Vision MaxRes:", visionmaxres_var, 9, padx=100, singleline=True, tooltip=f"Clamp MMProj vision maximum allowed resolution. Allowed values are between 512 to 2048 px (default {default_visionmaxres}).")
|
makecheckbox(model_tab, "Vision Force CPU", mmprojcpu_var, 9, tooltiptxt="Force CLIP for Vision mmproj always on CPU.")
|
||||||
|
makelabelentry(model_tab, "Vision MaxRes:", visionmaxres_var, 9, padx=320, singleline=True, tooltip=f"Clamp MMProj vision maximum allowed resolution. Allowed values are between 512 to 2048 px (default {default_visionmaxres}).", labelpadx=220)
|
||||||
makefileentry(model_tab, "Draft Model:", "Select Speculative Text Model File", draftmodel_var, 11,width=280,singlerow=True,tooltiptxt="Select a draft text model file to use for speculative decoding.\nLeave blank to skip.")
|
makefileentry(model_tab, "Draft Model:", "Select Speculative Text Model File", draftmodel_var, 11,width=280,singlerow=True,tooltiptxt="Select a draft text model file to use for speculative decoding.\nLeave blank to skip.")
|
||||||
makelabelentry(model_tab, "Draft Amount: ", draftamount_var, 13, 50,padx=100,singleline=True,tooltip="How many tokens to draft per chunk before verifying results")
|
makelabelentry(model_tab, "Draft Amount: ", draftamount_var, 13, 50,padx=100,singleline=True,tooltip="How many tokens to draft per chunk before verifying results")
|
||||||
makelabelentry(model_tab, "Splits: ", draftgpusplit_str_vars, 13, 50,padx=210,singleline=True,tooltip="Distribution of draft model layers. Leave blank to follow main model's gpu split. Only works if multi-gpu (All) selected in main model.", labelpadx=160)
|
makelabelentry(model_tab, "Splits: ", draftgpusplit_str_vars, 13, 50,padx=210,singleline=True,tooltip="Distribution of draft model layers. Leave blank to follow main model's gpu split. Only works if multi-gpu (All) selected in main model.", labelpadx=160)
|
||||||
|
@ -4694,6 +4698,7 @@ def show_gui():
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
args.mmproj = None if mmproj_var.get() == "" else mmproj_var.get()
|
args.mmproj = None if mmproj_var.get() == "" else mmproj_var.get()
|
||||||
|
args.mmprojcpu = (mmprojcpu_var.get()==1)
|
||||||
args.visionmaxres = int(visionmaxres_var.get()) if visionmaxres_var.get()!="" else default_visionmaxres
|
args.visionmaxres = int(visionmaxres_var.get()) if visionmaxres_var.get()!="" else default_visionmaxres
|
||||||
args.draftmodel = None if draftmodel_var.get() == "" else draftmodel_var.get()
|
args.draftmodel = None if draftmodel_var.get() == "" else draftmodel_var.get()
|
||||||
args.draftamount = int(draftamount_var.get()) if draftamount_var.get()!="" else default_draft_amount
|
args.draftamount = int(draftamount_var.get()) if draftamount_var.get()!="" else default_draft_amount
|
||||||
|
@ -4886,6 +4891,7 @@ def show_gui():
|
||||||
lora_var.set(dict["lora"][0])
|
lora_var.set(dict["lora"][0])
|
||||||
|
|
||||||
mmproj_var.set(dict["mmproj"] if ("mmproj" in dict and dict["mmproj"]) else "")
|
mmproj_var.set(dict["mmproj"] if ("mmproj" in dict and dict["mmproj"]) else "")
|
||||||
|
mmprojcpu_var.set(1 if ("mmprojcpu" in dict and dict["mmprojcpu"]) else 0)
|
||||||
if "visionmaxres" in dict and dict["visionmaxres"]:
|
if "visionmaxres" in dict and dict["visionmaxres"]:
|
||||||
visionmaxres_var.set(dict["visionmaxres"])
|
visionmaxres_var.set(dict["visionmaxres"])
|
||||||
draftmodel_var.set(dict["draftmodel"] if ("draftmodel" in dict and dict["draftmodel"]) else "")
|
draftmodel_var.set(dict["draftmodel"] if ("draftmodel" in dict and dict["draftmodel"]) else "")
|
||||||
|
@ -6575,6 +6581,7 @@ if __name__ == '__main__':
|
||||||
advparser.add_argument("--ssl", help="Allows all content to be served over SSL instead. A valid UNENCRYPTED SSL cert and key .pem files must be provided", metavar=('[cert_pem]', '[key_pem]'), nargs='+')
|
advparser.add_argument("--ssl", help="Allows all content to be served over SSL instead. A valid UNENCRYPTED SSL cert and key .pem files must be provided", metavar=('[cert_pem]', '[key_pem]'), nargs='+')
|
||||||
advparser.add_argument("--nocertify", help="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.", action='store_true')
|
advparser.add_argument("--nocertify", help="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.", action='store_true')
|
||||||
advparser.add_argument("--mmproj", metavar=('[filename]'), help="Select a multimodal projector file for vision models like LLaVA.", default="")
|
advparser.add_argument("--mmproj", metavar=('[filename]'), help="Select a multimodal projector file for vision models like LLaVA.", default="")
|
||||||
|
advparser.add_argument("--mmprojcpu", help="Force CLIP for Vision mmproj always on CPU.", action='store_true')
|
||||||
advparser.add_argument("--visionmaxres", metavar=('[max px]'), help="Clamp MMProj vision maximum allowed resolution. Allowed values are between 512 to 2048 px (default 1024).", type=int, default=default_visionmaxres)
|
advparser.add_argument("--visionmaxres", metavar=('[max px]'), help="Clamp MMProj vision maximum allowed resolution. Allowed values are between 512 to 2048 px (default 1024).", type=int, default=default_visionmaxres)
|
||||||
advparser.add_argument("--draftmodel", metavar=('[filename]'), help="Load a small draft model for speculative decoding. It will be fully offloaded. Vocab must match the main model.", default="")
|
advparser.add_argument("--draftmodel", metavar=('[filename]'), help="Load a small draft model for speculative decoding. It will be fully offloaded. Vocab must match the main model.", default="")
|
||||||
advparser.add_argument("--draftamount", metavar=('[tokens]'), help="How many tokens to draft per chunk before verifying results", type=int, default=default_draft_amount)
|
advparser.add_argument("--draftamount", metavar=('[tokens]'), help="How many tokens to draft per chunk before verifying results", type=int, default=default_draft_amount)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue