From b65bb4baaeae712975e09a92e1d37d3842ea5da7 Mon Sep 17 00:00:00 2001 From: ScrewTSW Date: Thu, 21 May 2026 13:29:13 +0200 Subject: [PATCH] server: expose prompt token counts in /slots endpoint (#23454) Add n_prompt_tokens, n_prompt_tokens_processed, and n_prompt_tokens_cache to the /slots JSON response. These fields are already tracked internally but were not exposed, making it impossible for clients to monitor prompt evaluation progress during processing. --- tools/server/server-context.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 80d77b0c0..b939e3b75 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -506,6 +506,9 @@ struct server_slot { if (ptask) { res["id_task"] = ptask->id; + res["n_prompt_tokens"] = (int32_t) prompt.tokens.size(); + res["n_prompt_tokens_processed"] = n_prompt_tokens_processed; + res["n_prompt_tokens_cache"] = n_prompt_tokens_cache; res["params"] = ptask->params.to_json(only_metrics); res["next_token"] = { {