server: expose prompt token counts in /slots endpoint (#23454)

Add n_prompt_tokens, n_prompt_tokens_processed, and n_prompt_tokens_cache
to the /slots JSON response. These fields are already tracked internally
but were not exposed, making it impossible for clients to monitor prompt
evaluation progress during processing.
This commit is contained in:
ScrewTSW 2026-05-21 13:29:13 +02:00 committed by GitHub
parent a1a69f777a
commit b65bb4baae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -506,6 +506,9 @@ struct server_slot {
if (ptask) {
res["id_task"] = ptask->id;
res["n_prompt_tokens"] = (int32_t) prompt.tokens.size();
res["n_prompt_tokens_processed"] = n_prompt_tokens_processed;
res["n_prompt_tokens_cache"] = n_prompt_tokens_cache;
res["params"] = ptask->params.to_json(only_metrics);
res["next_token"] = {
{