mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-07 00:41:50 +00:00
support multiple override kv
This commit is contained in:
parent
fef73919ea
commit
68c9d955d2
3 changed files with 30 additions and 14 deletions
3
expose.h
3
expose.h
|
|
@ -5,6 +5,7 @@ const int tensor_split_max = 16;
|
|||
const int images_max = 8;
|
||||
const int audio_max = 4;
|
||||
const int logprobs_max = 5;
|
||||
const int overridekv_max = 4;
|
||||
|
||||
// match kobold's sampler list and order
|
||||
enum samplers
|
||||
|
|
@ -65,7 +66,7 @@ struct load_model_inputs
|
|||
const int moecpu = 0;
|
||||
const bool no_bos_token = false;
|
||||
const bool load_guidance = false;
|
||||
const char * override_kv = nullptr;
|
||||
const char * override_kv[overridekv_max] = {};
|
||||
const char * override_tensors = nullptr;
|
||||
const bool flash_attention = false;
|
||||
const float tensor_split[tensor_split_max] = {};
|
||||
|
|
|
|||
|
|
@ -2281,14 +2281,18 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
kvo.val_i64 = inputs.moe_experts;
|
||||
kvos.push_back(kvo);
|
||||
}
|
||||
std::string override_kv = inputs.override_kv;
|
||||
if(override_kv != "" && file_format==FileFormat::GGUF_GENERIC)
|
||||
for(int x=0;x<overridekv_max;++x)
|
||||
{
|
||||
printf("\nAttempting to apply KV override: %s...\n",override_kv.c_str());
|
||||
bool kvo_ok = string_parse_kv_override(override_kv.c_str(),kvos);
|
||||
LLAMA_LOG_INFO("\nKV override parse: %s\n",(kvo_ok?"success":"failed"));
|
||||
fflush(stdout);
|
||||
std::string override_kv = inputs.override_kv[x];
|
||||
if(override_kv != "" && file_format==FileFormat::GGUF_GENERIC)
|
||||
{
|
||||
printf("\nAttempting to apply KV override: %s...\n",override_kv.c_str());
|
||||
bool kvo_ok = string_parse_kv_override(override_kv.c_str(),kvos);
|
||||
LLAMA_LOG_INFO("\nKV override parse: %s\n",(kvo_ok?"success":"failed"));
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if(kvos.size()>0)
|
||||
{
|
||||
kvos.emplace_back();
|
||||
|
|
|
|||
25
koboldcpp.py
25
koboldcpp.py
|
|
@ -55,13 +55,14 @@ net_save_slots = 12
|
|||
savestate_limit = 3 #3 savestate slots
|
||||
default_vae_tile_threshold = 768
|
||||
default_native_ctx = 16384
|
||||
overridekv_max = 4
|
||||
|
||||
# abuse prevention
|
||||
stop_token_max = 256
|
||||
ban_token_max = 768
|
||||
logit_bias_max = 512
|
||||
dry_seq_break_max = 128
|
||||
extra_images_max = 4
|
||||
extra_images_max = 4 # for kontext/qwen img
|
||||
|
||||
# global vars
|
||||
KcppVersion = "1.101"
|
||||
|
|
@ -201,7 +202,7 @@ class load_model_inputs(ctypes.Structure):
|
|||
("moecpu", ctypes.c_int),
|
||||
("no_bos_token", ctypes.c_bool),
|
||||
("load_guidance", ctypes.c_bool),
|
||||
("override_kv", ctypes.c_char_p),
|
||||
("override_kv", ctypes.c_char_p * overridekv_max),
|
||||
("override_tensors", ctypes.c_char_p),
|
||||
("flash_attention", ctypes.c_bool),
|
||||
("tensor_split", ctypes.c_float * tensor_split_max),
|
||||
|
|
@ -1446,7 +1447,17 @@ def load_model(model_filename):
|
|||
inputs.moe_experts = args.moeexperts
|
||||
inputs.no_bos_token = args.nobostoken
|
||||
inputs.load_guidance = args.enableguidance
|
||||
inputs.override_kv = args.overridekv.encode("UTF-8") if args.overridekv else "".encode("UTF-8")
|
||||
okv = []
|
||||
if args.overridekv and str(args.overridekv).count(",")>0 and str(args.overridekv).count("=")>1 and str(args.overridekv).count(":")==str(args.overridekv).count("="):
|
||||
okv = [x.strip() for x in str(args.overridekv).split(",")]
|
||||
okv = [item for item in okv if item and item.strip()]
|
||||
elif args.overridekv:
|
||||
okv = [args.overridekv]
|
||||
for n in range(overridekv_max):
|
||||
if not okv or n >= len(okv):
|
||||
inputs.override_kv[n] = "".encode("UTF-8")
|
||||
else:
|
||||
inputs.override_kv[n] = okv[n].encode("UTF-8")
|
||||
inputs.override_tensors = args.overridetensors.encode("UTF-8") if args.overridetensors else "".encode("UTF-8")
|
||||
inputs.moecpu = (200 if args.moecpu > 200 else args.moecpu)
|
||||
inputs.check_slowness = (not args.highpriority and os.name == 'nt' and 'Intel' in platform.processor())
|
||||
|
|
@ -5454,8 +5465,8 @@ def show_gui():
|
|||
makecheckbox(tokens_tab, "Enable Guidance", enableguidance_var, 43,padx=(200 if corrupt_scaler else 140), tooltiptxt="Enables the use of Classifier-Free-Guidance, which allows the use of negative prompts. Has performance and memory impact.")
|
||||
makelabelentry(tokens_tab, "MoE Experts:", moeexperts_var, row=55, padx=(220 if corrupt_scaler else 120), singleline=True, tooltip="Override number of MoE experts.")
|
||||
makelabelentry(tokens_tab, "MoE CPU Layers:", moecpu_var, row=55, padx=(490 if corrupt_scaler else 320), singleline=True, tooltip="Keep Mixture of Experts (MoE) weights of the first N layers in the CPU.", labelpadx=(300 if corrupt_scaler else 210))
|
||||
makelabelentry(tokens_tab, "Override KV:", override_kv_var, row=57, padx=(220 if corrupt_scaler else 120), singleline=True, width=150, tooltip="Advanced option to override model metadata by key, same as in llama.cpp. Mainly for debugging, not intended for general use. Types: int, float, bool, str")
|
||||
makelabelentry(tokens_tab, "Override Tensors:", override_tensors_var, row=59, padx=(220 if corrupt_scaler else 120), singleline=True, width=150, tooltip="Advanced option to override tensor backend selection, same as in llama.cpp.")
|
||||
makelabelentry(tokens_tab, "Override KV:", override_kv_var, row=57, padx=(220 if corrupt_scaler else 120), singleline=True, width=150, tooltip="Override metadata value by key. Separate multiple values with commas. Format is name=type:value. Types: int, float, bool, str")
|
||||
makelabelentry(tokens_tab, "Override Tensors:", override_tensors_var, row=59, padx=(220 if corrupt_scaler else 120), singleline=True, width=150, tooltip="Override selected backend for specific tensors matching tensor_name_regex_pattern=buffer_type, same as in llama.cpp.")
|
||||
|
||||
# Model Tab
|
||||
model_tab = tabcontent["Loaded Files"]
|
||||
|
|
@ -7885,8 +7896,8 @@ if __name__ == '__main__':
|
|||
advparser.add_argument("--nobostoken", help="Prevents BOS token from being added at the start of any prompt. Usually NOT recommended for most models.", action='store_true')
|
||||
advparser.add_argument("--enableguidance", help="Enables the use of Classifier-Free-Guidance, which allows the use of negative prompts. Has performance and memory impact.", action='store_true')
|
||||
advparser.add_argument("--maxrequestsize", metavar=('[size in MB]'), help="Specify a max request payload size. Any requests to the server larger than this size will be dropped. Do not change if unsure.", type=int, default=32)
|
||||
advparser.add_argument("--overridekv","--override-kv", metavar=('[name=type:value]'), help="Advanced option to override a metadata by key, same as in llama.cpp. Mainly for debugging, not intended for general use. Types: int, float, bool, str", default="")
|
||||
advparser.add_argument("--overridetensors","--override-tensor","-ot", metavar=('[tensor name pattern=buffer type]'), help="Advanced option to override tensor backend selection, same as in llama.cpp.", default="")
|
||||
advparser.add_argument("--overridekv","--override-kv", metavar=('[name=type:value]'), help="Override metadata value by key. Separate multiple values with commas. Format is name=type:value. Types: int, float, bool, str", default="")
|
||||
advparser.add_argument("--overridetensors","--override-tensor","-ot", metavar=('[tensor name pattern=buffer type]'), help="Override selected backend for specific tensors matching tensor_name_regex_pattern=buffer_type, same as in llama.cpp.", default="")
|
||||
compatgroup2 = parser.add_mutually_exclusive_group()
|
||||
compatgroup2.add_argument("--showgui", help="Always show the GUI instead of launching the model right away when loading settings from a .kcpps file.", action='store_true')
|
||||
compatgroup2.add_argument("--skiplauncher", help="Doesn't display or use the GUI launcher. Overrides showgui.", action='store_true')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue