mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
default speculative set to 8. added more adapter fields
This commit is contained in:
parent
5353bfa983
commit
32ac3153e4
3 changed files with 14 additions and 13 deletions
2
expose.h
2
expose.h
|
@ -41,7 +41,7 @@ struct load_model_inputs
|
||||||
const char * lora_filename = nullptr;
|
const char * lora_filename = nullptr;
|
||||||
const char * lora_base = nullptr;
|
const char * lora_base = nullptr;
|
||||||
const char * draftmodel_filename = nullptr;
|
const char * draftmodel_filename = nullptr;
|
||||||
const int draft_amount = 12;
|
const int draft_amount = 8;
|
||||||
const char * mmproj_filename = nullptr;
|
const char * mmproj_filename = nullptr;
|
||||||
const bool use_mmap = false;
|
const bool use_mmap = false;
|
||||||
const bool use_mlock = false;
|
const bool use_mlock = false;
|
||||||
|
|
|
@ -53,7 +53,7 @@ std::string lora_filename = "";
|
||||||
std::string lora_base = "";
|
std::string lora_base = "";
|
||||||
std::string mmproj_filename = "";
|
std::string mmproj_filename = "";
|
||||||
std::string draftmodel_filename = "";
|
std::string draftmodel_filename = "";
|
||||||
int speculative_chunk_amt = 12; //do it in chunks of this many tokens
|
int speculative_chunk_amt = 8; //do it in chunks of this many tokens
|
||||||
bool generation_finished;
|
bool generation_finished;
|
||||||
float last_process_time = 0;
|
float last_process_time = 0;
|
||||||
float last_eval_time = 0;
|
float last_eval_time = 0;
|
||||||
|
|
23
koboldcpp.py
23
koboldcpp.py
|
@ -24,6 +24,7 @@ images_max = 4
|
||||||
bias_min_value = -100.0
|
bias_min_value = -100.0
|
||||||
bias_max_value = 100.0
|
bias_max_value = 100.0
|
||||||
logprobs_max = 5
|
logprobs_max = 5
|
||||||
|
default_draft_amount = 8
|
||||||
|
|
||||||
# abuse prevention
|
# abuse prevention
|
||||||
stop_token_max = 256
|
stop_token_max = 256
|
||||||
|
@ -1385,7 +1386,9 @@ def transform_genparams(genparams, api_format):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
elif api_format==3 or api_format==4 or api_format==7:
|
elif api_format==3 or api_format==4 or api_format==7:
|
||||||
default_max_tok = (400 if api_format==4 else 200)
|
default_adapter = {} if chatcompl_adapter is None else chatcompl_adapter
|
||||||
|
adapter_obj = genparams.get('adapter', default_adapter)
|
||||||
|
default_max_tok = (adapter_obj.get("max_length", 512) if (api_format==4 or api_format==7) else 200)
|
||||||
genparams["max_length"] = genparams.get('max_tokens', genparams.get('max_completion_tokens', default_max_tok))
|
genparams["max_length"] = genparams.get('max_tokens', genparams.get('max_completion_tokens', default_max_tok))
|
||||||
presence_penalty = genparams.get('presence_penalty', genparams.get('frequency_penalty', 0.0))
|
presence_penalty = genparams.get('presence_penalty', genparams.get('frequency_penalty', 0.0))
|
||||||
genparams["presence_penalty"] = presence_penalty
|
genparams["presence_penalty"] = presence_penalty
|
||||||
|
@ -1401,8 +1404,6 @@ def transform_genparams(genparams, api_format):
|
||||||
if api_format==4 or api_format==7: #handle ollama chat here too
|
if api_format==4 or api_format==7: #handle ollama chat here too
|
||||||
# translate openai chat completion messages format into one big string.
|
# translate openai chat completion messages format into one big string.
|
||||||
messages_array = genparams.get('messages', [])
|
messages_array = genparams.get('messages', [])
|
||||||
default_adapter = {} if chatcompl_adapter is None else chatcompl_adapter
|
|
||||||
adapter_obj = genparams.get('adapter', default_adapter)
|
|
||||||
messages_string = ""
|
messages_string = ""
|
||||||
system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n")
|
system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n")
|
||||||
system_message_end = adapter_obj.get("system_end", "")
|
system_message_end = adapter_obj.get("system_end", "")
|
||||||
|
@ -2764,7 +2765,7 @@ def show_gui():
|
||||||
preloadstory_var = ctk.StringVar()
|
preloadstory_var = ctk.StringVar()
|
||||||
mmproj_var = ctk.StringVar()
|
mmproj_var = ctk.StringVar()
|
||||||
draftmodel_var = ctk.StringVar()
|
draftmodel_var = ctk.StringVar()
|
||||||
draftamount_var = ctk.StringVar(value="12")
|
draftamount_var = ctk.StringVar(value=str(default_draft_amount))
|
||||||
nomodel = ctk.IntVar(value=0)
|
nomodel = ctk.IntVar(value=0)
|
||||||
|
|
||||||
port_var = ctk.StringVar(value=defaultport)
|
port_var = ctk.StringVar(value=defaultport)
|
||||||
|
@ -2871,8 +2872,8 @@ def show_gui():
|
||||||
button = ctk.CTkButton(parent, 50, text="Browse", command= lambda a=var,b=searchtext:getfilename(a,b))
|
button = ctk.CTkButton(parent, 50, text="Browse", command= lambda a=var,b=searchtext:getfilename(a,b))
|
||||||
if singlerow:
|
if singlerow:
|
||||||
if singlecol:
|
if singlecol:
|
||||||
entry.grid(row=row, column=0, padx=(84+8), stick="w")
|
entry.grid(row=row, column=0, padx=(94+8), stick="w")
|
||||||
button.grid(row=row, column=0, padx=(84+width+12), stick="nw")
|
button.grid(row=row, column=0, padx=(94+width+12), stick="nw")
|
||||||
else:
|
else:
|
||||||
entry.grid(row=row, column=1, padx=8, stick="w")
|
entry.grid(row=row, column=1, padx=8, stick="w")
|
||||||
button.grid(row=row, column=1, padx=(width+12), stick="nw")
|
button.grid(row=row, column=1, padx=(width+12), stick="nw")
|
||||||
|
@ -3248,10 +3249,10 @@ def show_gui():
|
||||||
makefileentry(model_tab, "Text Model:", "Select GGUF or GGML Model File", model_var, 1,width=280,singlerow=True, onchoosefile=on_picked_model_file,tooltiptxt="Select a GGUF or GGML model file on disk to be loaded.")
|
makefileentry(model_tab, "Text Model:", "Select GGUF or GGML Model File", model_var, 1,width=280,singlerow=True, onchoosefile=on_picked_model_file,tooltiptxt="Select a GGUF or GGML model file on disk to be loaded.")
|
||||||
makefileentry(model_tab, "Text Lora:", "Select Lora File",lora_var, 3,width=280,singlerow=True,tooltiptxt="Select an optional GGML Text LoRA adapter to use.\nLeave blank to skip.")
|
makefileentry(model_tab, "Text Lora:", "Select Lora File",lora_var, 3,width=280,singlerow=True,tooltiptxt="Select an optional GGML Text LoRA adapter to use.\nLeave blank to skip.")
|
||||||
makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5,width=280,singlerow=True,tooltiptxt="Select an optional F16 GGML Text LoRA base file to use.\nLeave blank to skip.")
|
makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5,width=280,singlerow=True,tooltiptxt="Select an optional F16 GGML Text LoRA base file to use.\nLeave blank to skip.")
|
||||||
makefileentry(model_tab, "Vision mmproj:", "Select Vision mmproj File", mmproj_var, 7,width=280,tooltiptxt="Select a mmproj file to use for vision models like LLaVA.\nLeave blank to skip.")
|
makefileentry(model_tab, "Vision mmproj:", "Select Vision mmproj File", mmproj_var, 7,width=280,singlerow=True,tooltiptxt="Select a mmproj file to use for vision models like LLaVA.\nLeave blank to skip.")
|
||||||
makefileentry(model_tab, "Speculative Model:", "Select Draft Text Model File", draftmodel_var, 9,width=280,tooltiptxt="Select a draft text model file to use for speculative decoding.\nLeave blank to skip.")
|
makefileentry(model_tab, "Draft Model:", "Select Speculative Text Model File", draftmodel_var, 9,width=280,singlerow=True,tooltiptxt="Select a draft text model file to use for speculative decoding.\nLeave blank to skip.")
|
||||||
makelabelentry(model_tab, "Draft Amount: ", draftamount_var, 11, 50,padx=100,singleline=True,tooltip="How many tokens to draft per chunk before verifying results")
|
makelabelentry(model_tab, "Draft Amount: ", draftamount_var, 11, 50,padx=100,singleline=True,tooltip="How many tokens to draft per chunk before verifying results")
|
||||||
makefileentry(model_tab, "Preloaded Story:", "Select Preloaded Story File", preloadstory_var, 15,width=280,tooltiptxt="Select an optional KoboldAI JSON savefile \nto be served on launch to any client.")
|
makefileentry(model_tab, "Preload Story:", "Select Preloaded Story File", preloadstory_var, 15,width=280,singlerow=True,tooltiptxt="Select an optional KoboldAI JSON savefile \nto be served on launch to any client.")
|
||||||
makefileentry(model_tab, "ChatCompletions Adapter:", "Select ChatCompletions Adapter File", chatcompletionsadapter_var, 24, width=250, filetypes=[("JSON Adapter", "*.json")], tooltiptxt="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.")
|
makefileentry(model_tab, "ChatCompletions Adapter:", "Select ChatCompletions Adapter File", chatcompletionsadapter_var, 24, width=250, filetypes=[("JSON Adapter", "*.json")], tooltiptxt="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.")
|
||||||
def pickpremadetemplate():
|
def pickpremadetemplate():
|
||||||
initialDir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'kcpp_adapters')
|
initialDir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'kcpp_adapters')
|
||||||
|
@ -3508,7 +3509,7 @@ def show_gui():
|
||||||
pass
|
pass
|
||||||
args.mmproj = None if mmproj_var.get() == "" else mmproj_var.get()
|
args.mmproj = None if mmproj_var.get() == "" else mmproj_var.get()
|
||||||
args.draftmodel = None if draftmodel_var.get() == "" else draftmodel_var.get()
|
args.draftmodel = None if draftmodel_var.get() == "" else draftmodel_var.get()
|
||||||
args.draftamount = int(draftamount_var.get()) if draftamount_var.get()!="" else 12
|
args.draftamount = int(draftamount_var.get()) if draftamount_var.get()!="" else default_draft_amount
|
||||||
|
|
||||||
args.ssl = None if (ssl_cert_var.get() == "" or ssl_key_var.get() == "") else ([ssl_cert_var.get(), ssl_key_var.get()])
|
args.ssl = None if (ssl_cert_var.get() == "" or ssl_key_var.get() == "") else ([ssl_cert_var.get(), ssl_key_var.get()])
|
||||||
args.password = None if (password_var.get() == "") else (password_var.get())
|
args.password = None if (password_var.get() == "") else (password_var.get())
|
||||||
|
@ -4951,7 +4952,7 @@ if __name__ == '__main__':
|
||||||
advparser.add_argument("--nocertify", help="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.", action='store_true')
|
advparser.add_argument("--nocertify", help="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.", action='store_true')
|
||||||
advparser.add_argument("--mmproj", help="Select a multimodal projector file for vision models like LLaVA.", default="")
|
advparser.add_argument("--mmproj", help="Select a multimodal projector file for vision models like LLaVA.", default="")
|
||||||
advparser.add_argument("--draftmodel", help="Load a small draft model for speculative decoding. It will be fully offloaded. Vocab must match the main model.", default="")
|
advparser.add_argument("--draftmodel", help="Load a small draft model for speculative decoding. It will be fully offloaded. Vocab must match the main model.", default="")
|
||||||
advparser.add_argument("--draftamount", metavar=('[tokens]'), help="How many tokens to draft per chunk before verifying results", type=int, default=12)
|
advparser.add_argument("--draftamount", metavar=('[tokens]'), help="How many tokens to draft per chunk before verifying results", type=int, default=default_draft_amount)
|
||||||
advparser.add_argument("--password", help="Enter a password required to use this instance. This key will be required for all text endpoints. Image endpoints are not secured.", default=None)
|
advparser.add_argument("--password", help="Enter a password required to use this instance. This key will be required for all text endpoints. Image endpoints are not secured.", default=None)
|
||||||
advparser.add_argument("--ignoremissing", help="Ignores all missing non-essential files, just skipping them instead.", action='store_true')
|
advparser.add_argument("--ignoremissing", help="Ignores all missing non-essential files, just skipping them instead.", action='store_true')
|
||||||
advparser.add_argument("--chatcompletionsadapter", help="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.", default="")
|
advparser.add_argument("--chatcompletionsadapter", help="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.", default="")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue