default trim_stop to true, which trims any tokens after a stop sequence and the stop sequence itself. This is potentially a breaking change.

2025-09-10 17:14:36 +00:00 · 2024-12-03 22:44:10 +08:00 · 2024-12-03 22:44:10 +08:00 · 52cc908f7f
commit 52cc908f7f
parent 7d11d2946c
3 changed files with 18 additions and 14 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -976,7 +976,7 @@ def generate(genparams, is_quiet=False, stream_flag=False):
    grammar = genparams.get('grammar', '')
    grammar_retain_state = genparams.get('grammar_retain_state', False)
    genkey = genparams.get('genkey', '')
-    trimstop = genparams.get('trim_stop', False)
+    trimstop = genparams.get('trim_stop', True)
    quiet = is_quiet
    dynatemp_range = genparams.get('dynatemp_range', 0.0)
    dynatemp_exponent = genparams.get('dynatemp_exponent', 1.0)
@ -1723,7 +1723,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):

                if tokenStr!="" or streamDone:
                    sseq = genparams.get('stop_sequence', [])
-                    trimstop = genparams.get('trim_stop', False)
+                    trimstop = genparams.get('trim_stop', True)
                    if trimstop and not streamDone and string_contains_or_overlaps_sequence_substring(tokenStr,sseq):
                        tokenReserve += tokenStr
                        await asyncio.sleep(async_sleep_short) #if a stop sequence could trigger soon, do not send output