mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-22 19:47:49 +00:00
server : print graphs reused in slot timings (#23279)
Add graphs reused counter to the per-slot timing output, printed via llama_perf_context(). Assisted-by: llama.cpp:local pi Co-authored-by: ggerganov <ggerganov@users.noreply.github.com>
This commit is contained in:
parent
cd963fee6a
commit
3c81c8deea
1 changed files with 15 additions and 9 deletions
|
|
@ -467,20 +467,26 @@ struct server_slot {
|
|||
const double n_gen_second = 1e3 / t_token_generation * n_decoded;
|
||||
|
||||
SLT_INF(*this,
|
||||
"\n"
|
||||
"prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n"
|
||||
" eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n"
|
||||
"prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||
t_prompt_processing, n_prompt_tokens_processed, t_prompt, n_prompt_second);
|
||||
|
||||
SLT_INF(*this,
|
||||
" eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||
t_token_generation, n_decoded, t_gen, n_gen_second);
|
||||
|
||||
SLT_INF(*this,
|
||||
" total time = %10.2f ms / %5d tokens\n",
|
||||
t_prompt_processing, n_prompt_tokens_processed, t_prompt, n_prompt_second,
|
||||
t_token_generation, n_decoded, t_gen, n_gen_second,
|
||||
t_prompt_processing + t_token_generation, n_prompt_tokens_processed + n_decoded);
|
||||
|
||||
SLT_INF(*this,
|
||||
" graphs reused = %10d\n",
|
||||
llama_perf_context(ctx_tgt).n_reused);
|
||||
|
||||
if (n_draft_total > 0) {
|
||||
const float draft_ratio = (float) n_draft_accepted / n_draft_total;
|
||||
SLT_CNT(*this,
|
||||
"draft acceptance rate = %0.5f (%5d accepted / %5d generated)\n",
|
||||
draft_ratio, n_draft_accepted, n_draft_total
|
||||
);
|
||||
SLT_INF(*this,
|
||||
"draft acceptance = %0.5f (%5d accepted / %5d generated)\n",
|
||||
draft_ratio, n_draft_accepted, n_draft_total);
|
||||
}
|
||||
|
||||
common_speculative_print_stats(spec);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue