mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-08 18:30:50 +00:00
common : more accurate sampling timing (#17382)
* common : more accurate sampling timing * eval-callback : minor fixes * cont : add time_meas impl * cont : fix log msg [no ci] * cont : fix multiple definitions of time_meas * llama-cli : exclude chat template init from time measurement * cont : print percentage of unaccounted time * cont : do not reset timings
This commit is contained in:
parent
5088b435d4
commit
196f5083ef
7 changed files with 102 additions and 33 deletions
|
|
@ -472,9 +472,6 @@ static void llama_sampler_chain_reset(struct llama_sampler * smpl) {
|
|||
for (auto * smpl : chain->samplers) {
|
||||
llama_sampler_reset(smpl);
|
||||
}
|
||||
|
||||
chain->t_sample_us = 0;
|
||||
chain->n_sample = 0;
|
||||
}
|
||||
|
||||
static struct llama_sampler * llama_sampler_chain_clone(const struct llama_sampler * smpl) {
|
||||
|
|
@ -2670,8 +2667,7 @@ struct llama_perf_sampler_data llama_perf_sampler(const struct llama_sampler * c
|
|||
void llama_perf_sampler_print(const struct llama_sampler * chain) {
|
||||
const auto data = llama_perf_sampler(chain);
|
||||
|
||||
LLAMA_LOG_INFO("%s: sampling time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||
__func__, data.t_sample_ms, data.n_sample, data.t_sample_ms / data.n_sample, 1e3 / data.t_sample_ms * data.n_sample);
|
||||
LLAMA_LOG_INFO("%s: samplers time = %10.2f ms / %5d runs\n", __func__, data.t_sample_ms, data.n_sample);
|
||||
}
|
||||
|
||||
void llama_perf_sampler_reset(struct llama_sampler * chain) {
|
||||
|
|
@ -2681,5 +2677,6 @@ void llama_perf_sampler_reset(struct llama_sampler * chain) {
|
|||
|
||||
auto * ctx = (struct llama_sampler_chain *) chain->ctx;
|
||||
|
||||
ctx->t_sample_us = ctx->n_sample = 0;
|
||||
ctx->t_sample_us = 0;
|
||||
ctx->n_sample = 0;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue