mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-11 09:34:39 +00:00
llama-server: fix k-shift when output overlength
This commit is contained in:
parent
f032680cab
commit
bdf9d8e74b
3 changed files with 21 additions and 14 deletions
|
@ -2037,6 +2037,13 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
|||
params.chat_template = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE"));
|
||||
add_opt(llama_arg(
|
||||
{"-sys", "--system-prompt"}, "PROMPT",
|
||||
"system prompt to use with model (if applicable, depending on chat template)",
|
||||
[](gpt_params & params, const std::string & value) {
|
||||
params.system_prompt = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SYSTEM_PROMPT"));
|
||||
add_opt(llama_arg(
|
||||
{"-sps", "--slot-prompt-similarity"}, "SIMILARITY",
|
||||
format("how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue