mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-06-01 22:50:53 +00:00
server: fix checkpoints creation (#22929)
* common : add common_chat_split_by_role * cont : fix spans to reach end of message * server: fix checkpoints creation - extract message_spans from chat templates - find the prompt token position before the latest user message - split prompt batching at that position - create a context checkpoint before the latest user input - avoid periodic mid-prompt checkpoints when that position is known - handle multimodal prompts when mapping text/template positions to server prompt tokens - add --checkpoint-min-step to control minimum spacing between checkpoints * cont : clean-up * Support autoparser detection for message barriers * server: fix message span delimiter and update docs --------- Co-authored-by: Alde Rojas <hello@alde.dev> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: Piotr Wilkin <piotr.wilkin@syndatis.com>
This commit is contained in:
parent
6d57c26ef8
commit
e2ef8fe42c
15 changed files with 586 additions and 37 deletions
|
|
@ -1334,12 +1334,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
}
|
||||
).set_env("LLAMA_ARG_CTX_CHECKPOINTS").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
|
||||
add_opt(common_arg(
|
||||
{"-cpent", "--checkpoint-every-n-tokens"}, "N",
|
||||
string_format("create a checkpoint every n tokens during prefill (processing), -1 to disable (default: %d)", params.checkpoint_every_nt),
|
||||
{"-cms", "--checkpoint-min-step"}, "N",
|
||||
string_format("minimum spacing between context checkpoints in tokens (default: %d, 0 = no minimum)", params.checkpoint_min_step),
|
||||
[](common_params & params, int value) {
|
||||
params.checkpoint_every_nt = value;
|
||||
if (value < 0) {
|
||||
throw std::invalid_argument("checkpoint-min-step must be non-negative");
|
||||
}
|
||||
params.checkpoint_min_step = value;
|
||||
}
|
||||
).set_env("LLAMA_ARG_CHECKPOINT_EVERY_NT").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
|
||||
).set_env("LLAMA_ARG_CHECKPOINT_MIN_SPACING_NT").set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(common_arg(
|
||||
{"-cram", "--cache-ram"}, "N",
|
||||
string_format("set the maximum cache size in MiB (default: %d, -1 - no limit, 0 - disable)"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue