added more perf stats

2025-09-10 17:14:36 +00:00 · 2025-06-21 12:12:28 +08:00 · 2025-06-21 12:12:28 +08:00 · 65ff041827
commit 65ff041827
parent ea21a9d749
4 changed files with 39 additions and 2 deletions
--- a/expose.cpp
+++ b/expose.cpp
@ -278,6 +278,9 @@ extern "C"
    int get_last_token_count() {
        return last_token_count;
    }
+    int get_last_input_count() {
+        return last_input_count;
+    }
    int get_last_seed()
    {
        return last_seed;
--- a/expose.h
+++ b/expose.h
@ -286,6 +286,7 @@ extern bool generation_finished;
 extern float last_eval_time;
 extern float last_process_time;
 extern int last_token_count;
+extern int last_input_count;
 extern int last_seed;
 extern int total_gens;
 extern int total_img_gens;
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -59,6 +59,7 @@ bool generation_finished;
 float last_process_time = 0;
 float last_eval_time = 0;
 int last_token_count = 0;
+int last_input_count = 0;
 int last_seed = -1;
 int total_gens = 0;
 int last_draft_success = 0;
@ -1596,7 +1597,7 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar
    for (auto reject: llama_grammar_reject_candidates(grammar->rules, grammar->stacks, candidates_grammar)) {
        rejects[reject.index] = true;
    }
-    
+
    auto first = candidates->data;
    auto last  = first + candidates->size;
    last = std::remove_if(first, last,
@ -4318,6 +4319,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
    last_eval_time = pt2;
    last_process_time = pt1;
    last_token_count = realnpredict;
+    last_input_count = (finaltokcount<0?0:finaltokcount);
    last_seed = kcpp_data->seed;
    last_draft_failed = draft_failures;
    last_draft_success = draft_successes;
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -525,6 +525,7 @@ def init_library():
    handle.get_last_eval_time.restype = ctypes.c_float
    handle.get_last_process_time.restype = ctypes.c_float
    handle.get_last_token_count.restype = ctypes.c_int
+    handle.get_last_input_count.restype = ctypes.c_int
    handle.get_last_seed.restype = ctypes.c_int
    handle.get_last_draft_success.restype = ctypes.c_int
    handle.get_last_draft_failed.restype = ctypes.c_int
@ -3017,6 +3018,7 @@ Change Mode<br>
            lastp = handle.get_last_process_time()
            laste = handle.get_last_eval_time()
            lastc = handle.get_last_token_count()
+            lastic = handle.get_last_input_count()
            totalgens = handle.get_total_gens()
            totalimggens = handle.get_total_img_gens()
            totalttsgens = handle.get_total_tts_gens()
@ -3025,10 +3027,39 @@ Change Mode<br>
            lastseed = handle.get_last_seed()
            lastdraftsuccess = handle.get_last_draft_success()
            lastdraftfailed = handle.get_last_draft_failed()
+            t_pp = float(lastp)*float(lastic)*0.001
+            t_gen = float(laste)*float(lastc)*0.001
+            s_pp = float(lastic)/t_pp if t_pp>0 else 0
+            s_gen = float(lastc)/t_gen if t_gen>0 else 0
            uptime = time.time() - start_time
            idletime = time.time() - last_req_time
            is_quiet = True if (args.quiet and args.debugmode != 1) else False
-            response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc, "last_seed":lastseed, "last_draft_success":lastdraftsuccess, "last_draft_failed":lastdraftfailed, "total_gens":totalgens, "stop_reason":stopreason, "total_img_gens":totalimggens, "total_tts_gens":totalttsgens, "total_transcribe_gens":totaltranscribegens, "queue":requestsinqueue, "idle":(0 if modelbusy.locked() else 1), "hordeexitcounter":exitcounter, "uptime":uptime, "idletime":idletime, "quiet":is_quiet}).encode())
+            response_body = json.dumps(
+                {
+                    "last_process": lastp,
+                    "last_eval": laste,
+                    "last_token_count": lastc,
+                    "last_input_count": lastic,
+                    "last_process_time": t_pp,
+                    "last_eval_time": t_gen,
+                    "last_process_speed": s_pp,
+                    "last_eval_speed": s_gen,
+                    "last_seed": lastseed,
+                    "last_draft_success": lastdraftsuccess,
+                    "last_draft_failed": lastdraftfailed,
+                    "total_gens": totalgens,
+                    "stop_reason": stopreason,
+                    "total_img_gens": totalimggens,
+                    "total_tts_gens": totalttsgens,
+                    "total_transcribe_gens": totaltranscribegens,
+                    "queue": requestsinqueue,
+                    "idle": (0 if modelbusy.locked() else 1),
+                    "hordeexitcounter": exitcounter,
+                    "uptime": uptime,
+                    "idletime": idletime,
+                    "quiet": is_quiet,
+                }
+            ).encode()

        elif self.path.endswith('/api/extra/generate/check'):
            if not self.secure_endpoint():