added override tensor

This commit is contained in:
Concedo 2025-04-20 20:56:17 +08:00
parent 17360a3b32
commit 2ed6850c0b
3 changed files with 59 additions and 3 deletions

View file

@ -181,6 +181,7 @@ class load_model_inputs(ctypes.Structure):
("moe_experts", ctypes.c_int),
("no_bos_token", ctypes.c_bool),
("override_kv", ctypes.c_char_p),
("override_tensors", ctypes.c_char_p),
("flash_attention", ctypes.c_bool),
("tensor_split", ctypes.c_float * tensor_split_max),
("quant_k", ctypes.c_int),
@ -1214,6 +1215,7 @@ def load_model(model_filename):
inputs.moe_experts = args.moeexperts
inputs.no_bos_token = args.nobostoken
inputs.override_kv = args.overridekv.encode("UTF-8") if args.overridekv else "".encode("UTF-8")
inputs.override_tensors = args.overridetensors.encode("UTF-8") if args.overridetensors else "".encode("UTF-8")
inputs = set_backend_props(inputs)
ret = handle.load_model(inputs)
return ret
@ -3868,6 +3870,7 @@ def show_gui():
defaultgenamt_var = ctk.StringVar(value=str(512))
nobostoken_var = ctk.IntVar(value=0)
override_kv_var = ctk.StringVar(value="")
override_tensors_var = ctk.StringVar(value="")
model_var = ctk.StringVar()
lora_var = ctk.StringVar()
@ -4393,8 +4396,9 @@ def show_gui():
qkvslider,qkvlabel,qkvtitle = makeslider(tokens_tab, "Quantize KV Cache:", quantkv_text, quantkv_var, 0, 2, 30, set=0,tooltip="Enable quantization of KV cache.\nRequires FlashAttention for full effect, otherwise only K cache is quantized.")
quantkv_var.trace("w", toggleflashattn)
makecheckbox(tokens_tab, "No BOS Token", nobostoken_var, 43, tooltiptxt="Prevents BOS token from being added at the start of any prompt. Usually NOT recommended for most models.")
makelabelentry(tokens_tab, "MoE Experts:", moeexperts_var, row=45, padx=100, singleline=True, tooltip="Override number of MoE experts.")
makelabelentry(tokens_tab, "Override KV:", override_kv_var, row=47, padx=100, singleline=True, width=150, tooltip="Advanced option to override model metadata by key, same as in llama.cpp. Mainly for debugging, not intended for general use. Types: int, float, bool, str")
makelabelentry(tokens_tab, "MoE Experts:", moeexperts_var, row=45, padx=120, singleline=True, tooltip="Override number of MoE experts.")
makelabelentry(tokens_tab, "Override KV:", override_kv_var, row=47, padx=120, singleline=True, width=150, tooltip="Advanced option to override model metadata by key, same as in llama.cpp. Mainly for debugging, not intended for general use. Types: int, float, bool, str")
makelabelentry(tokens_tab, "Override Tensors:", override_tensors_var, row=49, padx=120, singleline=True, width=150, tooltip="Advanced option to override tensor backend selection, same as in llama.cpp.")
# Model Tab
model_tab = tabcontent["Loaded Files"]
@ -4667,6 +4671,7 @@ def show_gui():
args.defaultgenamt = int(defaultgenamt_var.get()) if defaultgenamt_var.get()!="" else 512
args.nobostoken = (nobostoken_var.get()==1)
args.overridekv = None if override_kv_var.get() == "" else override_kv_var.get()
args.overridetensors = None if override_tensors_var.get() == "" else override_tensors_var.get()
args.chatcompletionsadapter = None if chatcompletionsadapter_var.get() == "" else chatcompletionsadapter_var.get()
try:
if kcpp_exporting_template and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter):
@ -4861,6 +4866,8 @@ def show_gui():
nobostoken_var.set(dict["nobostoken"] if ("nobostoken" in dict) else 0)
if "overridekv" in dict and dict["overridekv"]:
override_kv_var.set(dict["overridekv"])
if "overridetensors" in dict and dict["overridetensors"]:
override_tensors_var.set(dict["overridetensors"])
if "blasbatchsize" in dict and dict["blasbatchsize"]:
blas_size_var.set(blasbatchsize_values.index(str(dict["blasbatchsize"])))
@ -6588,6 +6595,7 @@ if __name__ == '__main__':
advparser.add_argument("--nobostoken", help="Prevents BOS token from being added at the start of any prompt. Usually NOT recommended for most models.", action='store_true')
advparser.add_argument("--maxrequestsize", metavar=('[size in MB]'), help="Specify a max request payload size. Any requests to the server larger than this size will be dropped. Do not change if unsure.", type=int, default=32)
advparser.add_argument("--overridekv", metavar=('[name=type:value]'), help="Advanced option to override a metadata by key, same as in llama.cpp. Mainly for debugging, not intended for general use. Types: int, float, bool, str", default="")
advparser.add_argument("--overridetensors", metavar=('[tensor name pattern=buffer type]'), help="Advanced option to override tensor backend selection, same as in llama.cpp.", default="")
compatgroup2 = parser.add_mutually_exclusive_group()
compatgroup2.add_argument("--showgui", help="Always show the GUI instead of launching the model right away when loading settings from a .kcpps file.", action='store_true')
compatgroup2.add_argument("--skiplauncher", help="Doesn't display or use the GUI launcher.", action='store_true')