diff --git a/expose.cpp b/expose.cpp
index 1a9b2437e..34320756b 100644
--- a/expose.cpp
+++ b/expose.cpp
@@ -278,6 +278,9 @@ extern "C"
int get_last_token_count() {
return last_token_count;
}
+ int get_last_input_count() {
+ return last_input_count;
+ }
int get_last_seed()
{
return last_seed;
diff --git a/expose.h b/expose.h
index 25527ed1c..5a56ba816 100644
--- a/expose.h
+++ b/expose.h
@@ -286,6 +286,7 @@ extern bool generation_finished;
extern float last_eval_time;
extern float last_process_time;
extern int last_token_count;
+extern int last_input_count;
extern int last_seed;
extern int total_gens;
extern int total_img_gens;
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index bc0437056..ad9d7f922 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -59,6 +59,7 @@ bool generation_finished;
float last_process_time = 0;
float last_eval_time = 0;
int last_token_count = 0;
+int last_input_count = 0;
int last_seed = -1;
int total_gens = 0;
int last_draft_success = 0;
@@ -1596,7 +1597,7 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar
for (auto reject: llama_grammar_reject_candidates(grammar->rules, grammar->stacks, candidates_grammar)) {
rejects[reject.index] = true;
}
-
+
auto first = candidates->data;
auto last = first + candidates->size;
last = std::remove_if(first, last,
@@ -4318,6 +4319,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
last_eval_time = pt2;
last_process_time = pt1;
last_token_count = realnpredict;
+ last_input_count = (finaltokcount<0?0:finaltokcount);
last_seed = kcpp_data->seed;
last_draft_failed = draft_failures;
last_draft_success = draft_successes;
diff --git a/koboldcpp.py b/koboldcpp.py
index 8ac5de215..7e834aac5 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -525,6 +525,7 @@ def init_library():
handle.get_last_eval_time.restype = ctypes.c_float
handle.get_last_process_time.restype = ctypes.c_float
handle.get_last_token_count.restype = ctypes.c_int
+ handle.get_last_input_count.restype = ctypes.c_int
handle.get_last_seed.restype = ctypes.c_int
handle.get_last_draft_success.restype = ctypes.c_int
handle.get_last_draft_failed.restype = ctypes.c_int
@@ -3017,6 +3018,7 @@ Change Mode
lastp = handle.get_last_process_time()
laste = handle.get_last_eval_time()
lastc = handle.get_last_token_count()
+ lastic = handle.get_last_input_count()
totalgens = handle.get_total_gens()
totalimggens = handle.get_total_img_gens()
totalttsgens = handle.get_total_tts_gens()
@@ -3025,10 +3027,39 @@ Change Mode
lastseed = handle.get_last_seed()
lastdraftsuccess = handle.get_last_draft_success()
lastdraftfailed = handle.get_last_draft_failed()
+ t_pp = float(lastp)*float(lastic)*0.001
+ t_gen = float(laste)*float(lastc)*0.001
+ s_pp = float(lastic)/t_pp if t_pp>0 else 0
+ s_gen = float(lastc)/t_gen if t_gen>0 else 0
uptime = time.time() - start_time
idletime = time.time() - last_req_time
is_quiet = True if (args.quiet and args.debugmode != 1) else False
- response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc, "last_seed":lastseed, "last_draft_success":lastdraftsuccess, "last_draft_failed":lastdraftfailed, "total_gens":totalgens, "stop_reason":stopreason, "total_img_gens":totalimggens, "total_tts_gens":totalttsgens, "total_transcribe_gens":totaltranscribegens, "queue":requestsinqueue, "idle":(0 if modelbusy.locked() else 1), "hordeexitcounter":exitcounter, "uptime":uptime, "idletime":idletime, "quiet":is_quiet}).encode())
+ response_body = json.dumps(
+ {
+ "last_process": lastp,
+ "last_eval": laste,
+ "last_token_count": lastc,
+ "last_input_count": lastic,
+ "last_process_time": t_pp,
+ "last_eval_time": t_gen,
+ "last_process_speed": s_pp,
+ "last_eval_speed": s_gen,
+ "last_seed": lastseed,
+ "last_draft_success": lastdraftsuccess,
+ "last_draft_failed": lastdraftfailed,
+ "total_gens": totalgens,
+ "stop_reason": stopreason,
+ "total_img_gens": totalimggens,
+ "total_tts_gens": totalttsgens,
+ "total_transcribe_gens": totaltranscribegens,
+ "queue": requestsinqueue,
+ "idle": (0 if modelbusy.locked() else 1),
+ "hordeexitcounter": exitcounter,
+ "uptime": uptime,
+ "idletime": idletime,
+ "quiet": is_quiet,
+ }
+ ).encode()
elif self.path.endswith('/api/extra/generate/check'):
if not self.secure_endpoint():