fixes to stopper tokens, fixed BLAS mode for GPT2 and GPTJ, updated kobold lite

2025-09-11 01:24:36 +00:00 · 2023-04-16 21:54:18 +08:00 · 2023-04-16 21:54:18 +08:00 · c757fbee1d
commit c757fbee1d
parent 6548d3b3fb
6 changed files with 17 additions and 14 deletions
--- a/llama_adapter.cpp
+++ b/llama_adapter.cpp
@ -240,13 +240,13 @@ generation_outputs llama_generate(const generation_inputs inputs, generation_out
            // decrement remaining sampling budget
            --remaining_tokens;
            //printf("\nid:%d word:%s\n",id,llama_token_to_str(ctx, id));
-            concat_output += llama_token_to_str(ctx, id);
+            concat_output += llama_token_to_str(ctx, id);           
            for (const auto &matched : stop_sequence)
            {
                if (concat_output.find(matched) != std::string::npos)
                {
                    remaining_tokens = 0;
-                    printf("\n(Stop sequence triggered)");
+                    printf("\n(Stop sequence triggered: %s)",matched.c_str());
                    break;
                }
            }