Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/workflows/build.yml # README.md # common/common.cpp # examples/embedding/embedding.cpp # examples/json_schema_to_grammar.py # examples/llama.android/llama/src/main/cpp/llama-android.cpp # examples/llama.swiftui/README.md # examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj # examples/lookahead/lookahead.cpp # examples/parallel/parallel.cpp # examples/passkey/passkey.cpp # ggml/CMakeLists.txt # ggml/src/CMakeLists.txt # ggml/src/ggml-cpu/CMakeLists.txt # requirements.txt # requirements/requirements-all.txt # scripts/fetch_server_test_models.py # tests/test-chat.cpp # tests/test-json-schema-to-grammar.cpp
2025-09-15 03:19:41 +00:00 · 2025-03-06 18:54:58 +08:00 · 2025-03-06 18:54:58 +08:00 · ec43d2b147
commit ec43d2b147
parent 2fdef19d1b 42994048a3
52 changed files with 5600 additions and 946 deletions
--- a/common/common.h
+++ b/common/common.h
@ -106,9 +106,21 @@ enum common_conversation_mode {
    COMMON_CONVERSATION_MODE_AUTO     = 2,
 };

+enum common_grammar_trigger_type {
+    COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN,
+    COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
+    COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
+    COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
+};
+
 struct common_grammar_trigger {
-    std::string word;
-    bool at_start;
+    common_grammar_trigger_type type;
+    std::string value;
+    llama_token token = LLAMA_TOKEN_NULL;
+
+    // T can only be nlohmann::ordered_json
+    template <class T> T to_json() const;
+    template <class T> static common_grammar_trigger from_json(const T & in);
 };

 // sampling parameters
@ -159,8 +171,7 @@ struct common_params_sampling {

    std::string                         grammar; // optional BNF-like grammar to constrain sampling
    bool                                grammar_lazy = false;
-    std::vector<common_grammar_trigger> grammar_trigger_words;  // optional trigger words to trigger lazy grammar
-    std::vector<llama_token>            grammar_trigger_tokens; // optional trigger tokens to trigger lazy grammar and print trigger special tokens.
+    std::vector<common_grammar_trigger> grammar_triggers; // optional triggers (for lazy grammars)
    std::set<llama_token>               preserved_tokens;

    std::vector<llama_logit_bias> logit_bias; // logit biases to apply
@ -324,6 +335,8 @@ struct common_params {
    bool warmup            = true;  // warmup run
    bool check_tensors     = false; // validate tensor data

+    bool single_turn       = false; // single turn chat conversation
+
    ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
    ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V

@ -452,6 +465,8 @@ std::string string_repeat(const std::string & str, size_t n);

 void string_replace_all(std::string & s, const std::string & search, const std::string & replace);

+std::string regex_escape(const std::string & s);
+
 template<class T>
 static std::vector<T> string_split(const std::string & str, char delim) {
    static_assert(!std::is_same<T, std::string>::value, "Please use the specialized version for std::string");