survived the storm, again

2025-09-14 19:09:45 +00:00 · 2025-01-16 22:25:18 +08:00 · 2025-01-16 22:25:18 +08:00 · 11cd7c7bb0
commit 11cd7c7bb0
parent 2a00ee8fa8 adc5dd92e8
38 changed files with 31785 additions and 26630 deletions
--- a/common/common.h
+++ b/common/common.h
@ -99,6 +99,12 @@ enum dimre_method {
    DIMRE_METHOD_MEAN,
 };

+enum common_conversation_mode {
+    COMMON_CONVERSATION_MODE_DISABLED = 0,
+    COMMON_CONVERSATION_MODE_ENABLED  = 1,
+    COMMON_CONVERSATION_MODE_AUTO     = 2,
+};
+
 // sampling parameters
 struct common_params_sampling {
    uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
@ -273,7 +279,6 @@ struct common_params {
    bool special           = false; // enable special token output
    bool interactive       = false; // interactive mode
    bool interactive_first = false; // wait for user input immediately
-    bool conversation      = false; // conversation mode (does not print special tokens and suffix/prefix)
    bool prompt_cache_all  = false; // save user input and generations to prompt cache
    bool prompt_cache_ro   = false; // open the prompt cache read-only and do not update it

@ -299,6 +304,8 @@ struct common_params {
    ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
    ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V

+    common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
+
    // multimodal models (see examples/llava)
    std::string mmproj = "";        // path to multimodal projector                                         // NOLINT
    std::vector<std::string> image; // path to image file(s)
@ -452,6 +459,11 @@ static bool string_starts_with(const std::string & str,
    return str.rfind(prefix, 0) == 0;
 }

+static bool string_ends_with(const std::string & str,
+                               const std::string & suffix) {  // While we wait for C++20's std::string::ends_with...
+    return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0;
+}
+
 bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
 void string_process_escapes(std::string & input);

@ -499,6 +511,9 @@ struct llama_model * common_load_model_from_hf(
    const std::string & local_path,
    const std::string & hf_token,
    const struct llama_model_params & params);
+std::pair<std::string, std::string> common_get_hf_file(
+    const std::string & hf_repo_with_tag,
+    const std::string & hf_token);

 // clear LoRA adapters from context, then apply new list of adapters
 void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);