diff --git a/expose.cpp b/expose.cpp index 1a9b2437e..34320756b 100644 --- a/expose.cpp +++ b/expose.cpp @@ -278,6 +278,9 @@ extern "C" int get_last_token_count() { return last_token_count; } + int get_last_input_count() { + return last_input_count; + } int get_last_seed() { return last_seed; diff --git a/expose.h b/expose.h index 25527ed1c..5a56ba816 100644 --- a/expose.h +++ b/expose.h @@ -286,6 +286,7 @@ extern bool generation_finished; extern float last_eval_time; extern float last_process_time; extern int last_token_count; +extern int last_input_count; extern int last_seed; extern int total_gens; extern int total_img_gens; diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index bc0437056..ad9d7f922 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -59,6 +59,7 @@ bool generation_finished; float last_process_time = 0; float last_eval_time = 0; int last_token_count = 0; +int last_input_count = 0; int last_seed = -1; int total_gens = 0; int last_draft_success = 0; @@ -1596,7 +1597,7 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar for (auto reject: llama_grammar_reject_candidates(grammar->rules, grammar->stacks, candidates_grammar)) { rejects[reject.index] = true; } - + auto first = candidates->data; auto last = first + candidates->size; last = std::remove_if(first, last, @@ -4318,6 +4319,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs) last_eval_time = pt2; last_process_time = pt1; last_token_count = realnpredict; + last_input_count = (finaltokcount<0?0:finaltokcount); last_seed = kcpp_data->seed; last_draft_failed = draft_failures; last_draft_success = draft_successes; diff --git a/koboldcpp.py b/koboldcpp.py index 8ac5de215..7e834aac5 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -525,6 +525,7 @@ def init_library(): handle.get_last_eval_time.restype = ctypes.c_float handle.get_last_process_time.restype = ctypes.c_float handle.get_last_token_count.restype = ctypes.c_int + handle.get_last_input_count.restype = ctypes.c_int handle.get_last_seed.restype = ctypes.c_int handle.get_last_draft_success.restype = ctypes.c_int handle.get_last_draft_failed.restype = ctypes.c_int @@ -3017,6 +3018,7 @@ Change Mode
lastp = handle.get_last_process_time() laste = handle.get_last_eval_time() lastc = handle.get_last_token_count() + lastic = handle.get_last_input_count() totalgens = handle.get_total_gens() totalimggens = handle.get_total_img_gens() totalttsgens = handle.get_total_tts_gens() @@ -3025,10 +3027,39 @@ Change Mode
lastseed = handle.get_last_seed() lastdraftsuccess = handle.get_last_draft_success() lastdraftfailed = handle.get_last_draft_failed() + t_pp = float(lastp)*float(lastic)*0.001 + t_gen = float(laste)*float(lastc)*0.001 + s_pp = float(lastic)/t_pp if t_pp>0 else 0 + s_gen = float(lastc)/t_gen if t_gen>0 else 0 uptime = time.time() - start_time idletime = time.time() - last_req_time is_quiet = True if (args.quiet and args.debugmode != 1) else False - response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc, "last_seed":lastseed, "last_draft_success":lastdraftsuccess, "last_draft_failed":lastdraftfailed, "total_gens":totalgens, "stop_reason":stopreason, "total_img_gens":totalimggens, "total_tts_gens":totalttsgens, "total_transcribe_gens":totaltranscribegens, "queue":requestsinqueue, "idle":(0 if modelbusy.locked() else 1), "hordeexitcounter":exitcounter, "uptime":uptime, "idletime":idletime, "quiet":is_quiet}).encode()) + response_body = json.dumps( + { + "last_process": lastp, + "last_eval": laste, + "last_token_count": lastc, + "last_input_count": lastic, + "last_process_time": t_pp, + "last_eval_time": t_gen, + "last_process_speed": s_pp, + "last_eval_speed": s_gen, + "last_seed": lastseed, + "last_draft_success": lastdraftsuccess, + "last_draft_failed": lastdraftfailed, + "total_gens": totalgens, + "stop_reason": stopreason, + "total_img_gens": totalimggens, + "total_tts_gens": totalttsgens, + "total_transcribe_gens": totaltranscribegens, + "queue": requestsinqueue, + "idle": (0 if modelbusy.locked() else 1), + "hordeexitcounter": exitcounter, + "uptime": uptime, + "idletime": idletime, + "quiet": is_quiet, + } + ).encode() elif self.path.endswith('/api/extra/generate/check'): if not self.secure_endpoint():