mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
show warning if genamt >= ctxsize, show t/s values
This commit is contained in:
parent
71cc19e76d
commit
340fbbbb04
2 changed files with 5 additions and 3 deletions
|
@ -2083,10 +2083,12 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
}
|
}
|
||||||
time2 = timer_check();
|
time2 = timer_check();
|
||||||
float pt1 = (time1*1000.0/(embd_inp.size()==0?1:embd_inp.size()));
|
float pt1 = (time1*1000.0/(embd_inp.size()==0?1:embd_inp.size()));
|
||||||
|
float ts1 = (1000.0/pt1);
|
||||||
int realnpredict = kcpp_params->n_predict-stopper_unused_tokens;
|
int realnpredict = kcpp_params->n_predict-stopper_unused_tokens;
|
||||||
float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict));
|
float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict));
|
||||||
|
float ts2 = (1000.0/pt2);
|
||||||
float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2));
|
float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2));
|
||||||
printf("\nContextLimit: %d/%d, Processing:%.2fs (%.1fms/T), Generation:%.2fs (%.1fms/T), Total:%.2fs (%.1fms/T = %.2fT/s)",current_context_tokens.size(),nctx, time1, pt1, time2, pt2, (time1 + time2), (1000.0f/tokens_per_second) , tokens_per_second);
|
printf("\nContextLimit: %d/%d, Processing:%.2fs (%.1fms/T = %.2fT/s), Generation:%.2fs (%.1fms/T = %.2fT/s), Total:%.2fs (%.1fms/T = %.2fT/s)",current_context_tokens.size(),nctx, time1, pt1, ts1, time2, pt2, ts2, (time1 + time2), (1000.0f/tokens_per_second) , tokens_per_second);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
output.status = 1;
|
output.status = 1;
|
||||||
generation_finished = true;
|
generation_finished = true;
|
||||||
|
|
|
@ -334,9 +334,9 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu
|
||||||
outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
|
outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
|
||||||
inputs.prompt = prompt.encode("UTF-8")
|
inputs.prompt = prompt.encode("UTF-8")
|
||||||
inputs.memory = memory.encode("UTF-8")
|
inputs.memory = memory.encode("UTF-8")
|
||||||
if max_length >= max_context_length:
|
if max_length >= (max_context_length-1):
|
||||||
max_length = max_context_length-1
|
max_length = max_context_length-1
|
||||||
print("\nWARNING: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be gone and your outputs will not be very coherent.")
|
print("\nWarning: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be removed, and your outputs will not be very coherent.")
|
||||||
global showmaxctxwarning
|
global showmaxctxwarning
|
||||||
if max_context_length > maxctx:
|
if max_context_length > maxctx:
|
||||||
if showmaxctxwarning:
|
if showmaxctxwarning:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue