mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
default to 12 tokens drafted
This commit is contained in:
parent
b21d0fe3ac
commit
e0c59486ee
3 changed files with 9 additions and 6 deletions
2
expose.h
2
expose.h
|
@ -41,7 +41,7 @@ struct load_model_inputs
|
|||
const char * lora_filename = nullptr;
|
||||
const char * lora_base = nullptr;
|
||||
const char * draftmodel_filename = nullptr;
|
||||
const int draft_amount = 16;
|
||||
const int draft_amount = 12;
|
||||
const char * mmproj_filename = nullptr;
|
||||
const bool use_mmap = false;
|
||||
const bool use_mlock = false;
|
||||
|
|
|
@ -53,7 +53,7 @@ std::string lora_filename = "";
|
|||
std::string lora_base = "";
|
||||
std::string mmproj_filename = "";
|
||||
std::string draftmodel_filename = "";
|
||||
int speculative_chunk_amt = 16; //do it in chunks of this many tokens
|
||||
int speculative_chunk_amt = 12; //do it in chunks of this many tokens
|
||||
bool generation_finished;
|
||||
float last_process_time = 0;
|
||||
float last_eval_time = 0;
|
||||
|
@ -680,7 +680,10 @@ static speculative_draft_result speculative_decoding_eval_chunk(llama_context *
|
|||
++draft_npast;
|
||||
}
|
||||
//now that we have our drafted tokens, we form a batch and PP it
|
||||
kcpp_embd_batch batch2 = kcpp_embd_batch(drafted_ids, actual_npast, true);
|
||||
|
||||
std::vector<int> real_embd = drafted_ids;
|
||||
real_embd.pop_back();
|
||||
kcpp_embd_batch batch2 = kcpp_embd_batch(real_embd, actual_npast, true);
|
||||
auto draftok = (llama_decode(main_ctx, batch2.batch)==0); //actual eval for big model
|
||||
if(!draftok)
|
||||
{
|
||||
|
|
|
@ -2764,7 +2764,7 @@ def show_gui():
|
|||
preloadstory_var = ctk.StringVar()
|
||||
mmproj_var = ctk.StringVar()
|
||||
draftmodel_var = ctk.StringVar()
|
||||
draftamount_var = ctk.StringVar(value="16")
|
||||
draftamount_var = ctk.StringVar(value="12")
|
||||
nomodel = ctk.IntVar(value=0)
|
||||
|
||||
port_var = ctk.StringVar(value=defaultport)
|
||||
|
@ -3508,7 +3508,7 @@ def show_gui():
|
|||
pass
|
||||
args.mmproj = None if mmproj_var.get() == "" else mmproj_var.get()
|
||||
args.draftmodel = None if draftmodel_var.get() == "" else draftmodel_var.get()
|
||||
args.draftamount = int(draftamount_var.get()) if draftamount_var.get()!="" else 16
|
||||
args.draftamount = int(draftamount_var.get()) if draftamount_var.get()!="" else 12
|
||||
|
||||
args.ssl = None if (ssl_cert_var.get() == "" or ssl_key_var.get() == "") else ([ssl_cert_var.get(), ssl_key_var.get()])
|
||||
args.password = None if (password_var.get() == "") else (password_var.get())
|
||||
|
@ -4951,7 +4951,7 @@ if __name__ == '__main__':
|
|||
advparser.add_argument("--nocertify", help="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.", action='store_true')
|
||||
advparser.add_argument("--mmproj", help="Select a multimodal projector file for vision models like LLaVA.", default="")
|
||||
advparser.add_argument("--draftmodel", help="Load a small draft model for speculative decoding. It will be fully offloaded. Vocab must match the main model.", default="")
|
||||
advparser.add_argument("--draftamount", metavar=('[tokens]'), help="How many tokens to draft per chunk before verifying results", type=int, default=16)
|
||||
advparser.add_argument("--draftamount", metavar=('[tokens]'), help="How many tokens to draft per chunk before verifying results", type=int, default=12)
|
||||
advparser.add_argument("--password", help="Enter a password required to use this instance. This key will be required for all text endpoints. Image endpoints are not secured.", default=None)
|
||||
advparser.add_argument("--ignoremissing", help="Ignores all missing non-essential files, just skipping them instead.", action='store_true')
|
||||
advparser.add_argument("--chatcompletionsadapter", help="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.", default="")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue