Merge branch 'master' into concedo_experimental

# Conflicts: # .github/workflows/build.yml # .gitignore # CMakeLists.txt # Makefile # Package.swift # README.md # ggml-cuda.cu # llama.cpp # llama.h # scripts/sync-ggml.sh # tests/CMakeLists.txt
2026-05-22 03:10:03 +00:00 · 2023-12-08 17:42:26 +08:00 · 2023-12-08 17:42:26 +08:00 · ec21fa7712
commit ec21fa7712
parent 930cdfb1ce fe680e3d10
34 changed files with 5887 additions and 1435 deletions
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@ -205,8 +205,9 @@ int main(int argc, char ** argv) {

            const std::string token_str = llama_token_to_piece(ctx_tgt, id);

-            printf("%s", token_str.c_str());
-            fflush(stdout);
+            if (!params.use_color) {
+                printf("%s", token_str.c_str());
+            }

            if (id == llama_token_eos(model_tgt)) {
                has_eos = true;
@ -238,10 +239,18 @@ int main(int argc, char ** argv) {
                    ++n_past_tgt;
                    ++n_past_dft;
                    ++i_dft;
-
+                    if (params.use_color) {
+                        // Color token according to its origin sequence
+                        printf("\u001b[%dm%s\u001b[37m", (36 - s_keep % 6), token_str.c_str());
+                        fflush(stdout);
+                    }
                    continue;
                }
            }
+            if (params.use_color) {
+                printf("%s", token_str.c_str());
+            }
+            fflush(stdout);

            LOG("the sampled target token (%d, '%s') did not match, or we ran out of drafted tokens\n", id, token_str.c_str());