show warning if genamt >= ctxsize, show t/s values

This commit is contained in:
Concedo 2024-01-31 18:51:42 +08:00
parent 71cc19e76d
commit 340fbbbb04
2 changed files with 5 additions and 3 deletions

View file

@ -2083,10 +2083,12 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
}
time2 = timer_check();
float pt1 = (time1*1000.0/(embd_inp.size()==0?1:embd_inp.size()));
float ts1 = (1000.0/pt1);
int realnpredict = kcpp_params->n_predict-stopper_unused_tokens;
float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict));
float ts2 = (1000.0/pt2);
float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2));
printf("\nContextLimit: %d/%d, Processing:%.2fs (%.1fms/T), Generation:%.2fs (%.1fms/T), Total:%.2fs (%.1fms/T = %.2fT/s)",current_context_tokens.size(),nctx, time1, pt1, time2, pt2, (time1 + time2), (1000.0f/tokens_per_second) , tokens_per_second);
printf("\nContextLimit: %d/%d, Processing:%.2fs (%.1fms/T = %.2fT/s), Generation:%.2fs (%.1fms/T = %.2fT/s), Total:%.2fs (%.1fms/T = %.2fT/s)",current_context_tokens.size(),nctx, time1, pt1, ts1, time2, pt2, ts2, (time1 + time2), (1000.0f/tokens_per_second) , tokens_per_second);
fflush(stdout);
output.status = 1;
generation_finished = true;

View file

@ -334,9 +334,9 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu
outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
inputs.prompt = prompt.encode("UTF-8")
inputs.memory = memory.encode("UTF-8")
if max_length >= max_context_length:
if max_length >= (max_context_length-1):
max_length = max_context_length-1
print("\nWARNING: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be gone and your outputs will not be very coherent.")
print("\nWarning: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be removed, and your outputs will not be very coherent.")
global showmaxctxwarning
if max_context_length > maxctx:
if showmaxctxwarning: