diff --git a/koboldcpp.py b/koboldcpp.py index 0efa45b78..bb980c1c6 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1228,13 +1228,16 @@ def generate(genparams, stream_flag=False): global showmaxctxwarning if max_context_length > maxctx: if showmaxctxwarning: - print(f"\n(Warning! Request max_context_length={max_context_length} exceeds allocated context size of {maxctx}. It will be reduced to fit. Consider launching with increased --contextsize to avoid errors. This message will only show once per session.)") + print(f"\n!!! ====== !!!\n(Warning! Request max_context_length={max_context_length} exceeds allocated context size of {maxctx}. It will be reduced to fit. Consider launching with increased --contextsize to avoid issues. This message will only show once per session.)\n!!! ====== !!!") showmaxctxwarning = False max_context_length = maxctx - min_remain = min(max_context_length-4, 16) - if max_length >= (max_context_length-min_remain): - max_length = max_context_length-min_remain - print("\nWarning: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be removed, and your outputs will not be very coherent.") + min_remain_hardlimit = max(min(max_context_length-4, 16),int(max_context_length*0.1)) + min_remain_softlimit = max(min(max_context_length-4, 16),int(max_context_length*0.4)) + if max_length >= (max_context_length-min_remain_softlimit): + print(f"\n!!! ====== !!!\nWarning: You are trying to generate text with max_length ({max_length}) near or exceeding max_context_length limit ({max_context_length}).\nMost of the context will be removed, and your outputs will not be very coherent.\nConsider launching with increased --contextsize to avoid issues.\n!!! ====== !!!") + if max_length >= (max_context_length-min_remain_hardlimit): + max_length = max_context_length-min_remain_hardlimit + inputs.max_context_length = max_context_length # this will resize the context buffer if changed inputs.max_length = max_length