mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-17 04:19:40 +00:00
warning for max tokens being too high
This commit is contained in:
parent
669311365c
commit
7f1003be44
1 changed files with 8 additions and 5 deletions
13
koboldcpp.py
13
koboldcpp.py
|
@ -1228,13 +1228,16 @@ def generate(genparams, stream_flag=False):
|
||||||
global showmaxctxwarning
|
global showmaxctxwarning
|
||||||
if max_context_length > maxctx:
|
if max_context_length > maxctx:
|
||||||
if showmaxctxwarning:
|
if showmaxctxwarning:
|
||||||
print(f"\n(Warning! Request max_context_length={max_context_length} exceeds allocated context size of {maxctx}. It will be reduced to fit. Consider launching with increased --contextsize to avoid errors. This message will only show once per session.)")
|
print(f"\n!!! ====== !!!\n(Warning! Request max_context_length={max_context_length} exceeds allocated context size of {maxctx}. It will be reduced to fit. Consider launching with increased --contextsize to avoid issues. This message will only show once per session.)\n!!! ====== !!!")
|
||||||
showmaxctxwarning = False
|
showmaxctxwarning = False
|
||||||
max_context_length = maxctx
|
max_context_length = maxctx
|
||||||
min_remain = min(max_context_length-4, 16)
|
min_remain_hardlimit = max(min(max_context_length-4, 16),int(max_context_length*0.1))
|
||||||
if max_length >= (max_context_length-min_remain):
|
min_remain_softlimit = max(min(max_context_length-4, 16),int(max_context_length*0.4))
|
||||||
max_length = max_context_length-min_remain
|
if max_length >= (max_context_length-min_remain_softlimit):
|
||||||
print("\nWarning: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be removed, and your outputs will not be very coherent.")
|
print(f"\n!!! ====== !!!\nWarning: You are trying to generate text with max_length ({max_length}) near or exceeding max_context_length limit ({max_context_length}).\nMost of the context will be removed, and your outputs will not be very coherent.\nConsider launching with increased --contextsize to avoid issues.\n!!! ====== !!!")
|
||||||
|
if max_length >= (max_context_length-min_remain_hardlimit):
|
||||||
|
max_length = max_context_length-min_remain_hardlimit
|
||||||
|
|
||||||
|
|
||||||
inputs.max_context_length = max_context_length # this will resize the context buffer if changed
|
inputs.max_context_length = max_context_length # this will resize the context buffer if changed
|
||||||
inputs.max_length = max_length
|
inputs.max_length = max_length
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue