Merge branch 'master' into concedo_experimental

# Conflicts: # Makefile # ggml-cuda.cu # tests/test-tokenizer-0-falcon.py # tests/test-tokenizer-0-llama.py
2025-09-11 01:24:36 +00:00 · 2023-11-18 11:10:45 +08:00 · 2023-11-18 11:10:45 +08:00 · 6bf8ee4aea
commit 6bf8ee4aea
parent 35ce2b054f bbecf3f415
29 changed files with 448 additions and 113 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1073,6 +1073,12 @@ std::string llama_detokenize_bpe(llama_context * ctx, const std::vector<llama_to
    return result;
 }

+bool llama_should_add_bos_token(const llama_model * model) {
+    const int add_bos = llama_add_bos_token(model);
+
+    return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
+}
+
 //
 // YAML utils
 //
@ -1189,6 +1195,7 @@ void dump_string_yaml_multiline(FILE * stream, const char * prop_name, const cha
    if (!data_str.empty() && (std::isspace(data_str[0]) || std::isspace(data_str.back()))) {
        data_str = std::regex_replace(data_str, std::regex("\n"), "\\n");
        data_str = std::regex_replace(data_str, std::regex("\""), "\\\"");
+        data_str = std::regex_replace(data_str, std::regex(R"(\\[^n"])"), R"(\$&)");
        data_str = "\"" + data_str + "\"";
        fprintf(stream, "%s: %s\n", prop_name, data_str.c_str());
        return;