mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-11 09:34:39 +00:00
Add speculative decoding support to the server and command-line interfaces
This commit is contained in:
parent
1ea2d61a97
commit
2e8e42a5ad
11 changed files with 591 additions and 31 deletions
|
@ -1704,9 +1704,9 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||
{"-md", "--model-draft"}, "FNAME",
|
||||
"draft model for speculative decoding (default: unused)",
|
||||
[](gpt_params & params, const std::string & value) {
|
||||
params.model_draft = value;
|
||||
params.speculative.model = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
|
||||
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(llama_arg(
|
||||
{"-mu", "--model-url"}, "MODEL_URL",
|
||||
"model download url (default: unused)",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue