Merge branch 'upstream' into concedo_experimental

# Conflicts: # Package.swift # flake.lock # tests/test-chat-template.cpp
2025-09-15 11:29:43 +00:00 · 2024-04-15 21:39:40 +08:00 · 2024-04-15 21:39:40 +08:00 · 02892c5cbd
commit 02892c5cbd
parent eb380b500f 132f55795e
4 changed files with 2762 additions and 2793 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -13378,6 +13378,11 @@ struct llama_beam_search_data {
            }
            llama_logit_info logit_info(ctx);
            std::vector<llama_token_data> next_tokens = logit_info.top_k(n_beams);
+
+            // Clear the kv slot so that other beams may try different tokens at this position. The llama_decode()
+            // call in loop() will conclusively fill in the kv slot once the beams converge at this position.
+            llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
+
            size_t i=0;
            if (next_beams.size() < n_beams) {
                for (; next_beams.size() < n_beams ; ++i) {
@ -15778,6 +15783,8 @@ size_t llama_state_set_data(struct llama_context * ctx, const uint8_t * src) {
                GGML_ASSERT((uint32_t) id < ctx->cparams.n_batch);
                ctx->output_ids[id] = i;
            }
+
+            ctx->n_outputs = n_outputs;
        }
    }

@ -16946,6 +16953,21 @@ static int32_t llama_chat_apply_template_internal(
        if (add_ass) {
            ss << "### Response:\n";
        }
+    } else if (tmpl == "command-r" || (tmpl.find("<|START_OF_TURN_TOKEN|>") != std::string::npos && tmpl.find("<|USER_TOKEN|>") != std::string::npos)) {
+        // CohereForAI/c4ai-command-r-plus
+        for (auto message : chat) {
+            std::string role(message->role);
+            if (role == "system") {
+                ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
+            } else if (role == "user") {
+                ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
+            } else if (role == "assistant") {
+                ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
+            }
+        }
+        if (add_ass) {
+            ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
+        }
    } else {
        // template not supported
        return -1;