From 26f1df5e5f970c4694e543a8c6e9963ddca29e55 Mon Sep 17 00:00:00 2001 From: Llama <34464159+pi6am@users.noreply.github.com> Date: Mon, 29 Jul 2024 05:16:47 -0700 Subject: [PATCH] Fix the penultimate token sometimes being lost with SSE streaming (#1031) The token immediately before an eot token was lost when SSE streaming was enabled if that token was contained entirely within a stop sequence. As an example of when this could happen, consider this prompt: Type the phrase 'pleas' once. In a Llama 3-derived model, 'pleas' tokenizes as 'ple' 'as'. The token 'as' is contained within this instruct mode stop sequence: <|eot_id|><|start_header_id|>assistant<|end_header_id|> due to the word 'assistant'. Since `string_contains_sequence_substring` returns True for 'as', this token is added to `tokenReserve` instead of being streamed immediately. If the '<|eot_id|>' token was generated next, the text in `tokenReserve` would be discarded. --- koboldcpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/koboldcpp.py b/koboldcpp.py index e3c821f1d..a3b121db2 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1447,7 +1447,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): tokenReserve += tokenStr await asyncio.sleep(async_sleep_short) #if a stop sequence could trigger soon, do not send output else: - if tokenStr!="": + if tokenStr!="" or tokenReserve!="": tokenStr = tokenReserve + tokenStr tokenReserve = ""