Merge branch 'upstream' into concedo_experimental

2025-09-11 01:24:36 +00:00 · 2025-03-05 00:22:06 +08:00 · 2025-03-05 00:22:06 +08:00 · 0cddbe1f0b
commit 0cddbe1f0b
parent 2a22a9eec6 1a24c4621f
30 changed files with 2561 additions and 1226 deletions
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -32,8 +32,6 @@
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif

-static const char * DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant";
-
 static llama_context           ** g_ctx;
 static llama_model             ** g_model;
 static common_sampler          ** g_smpl;
@ -220,6 +218,10 @@ int main(int argc, char ** argv) {
    // print chat template example in conversation mode
    if (params.conversation_mode) {
        if (params.enable_chat_template) {
+            if (!params.prompt.empty()) {
+                LOG_WRN("*** User-specified prompt in conversation mode will be ignored, did you mean to set --system-prompt (-sys) instead?\n");
+            }
+
            LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(chat_templates.get(), params.use_jinja).c_str());
        } else {
            LOG_INF("%s: in-suffix/prefix is specified, chat template will be disabled\n", __func__);
@ -264,6 +266,7 @@ int main(int argc, char ** argv) {

    std::vector<llama_token> embd_inp;

+    bool waiting_for_first_input = params.conversation_mode && params.enable_chat_template && params.system_prompt.empty();
    auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) {
        common_chat_msg new_msg;
        new_msg.role = role;
@ -275,11 +278,20 @@ int main(int argc, char ** argv) {
    };

    {
-        auto prompt = (params.conversation_mode && params.enable_chat_template)
-            // format the system prompt in conversation mode (fallback to default if empty)
-            ? chat_add_and_format("system", params.prompt.empty() ? DEFAULT_SYSTEM_MESSAGE : params.prompt)
+        std::string prompt;
+
+        if (params.conversation_mode && params.enable_chat_template) {
+            // format the system prompt in conversation mode (will use template default if empty)
+            prompt = params.system_prompt;
+
+            if (!prompt.empty()) {
+                prompt = chat_add_and_format("system", prompt);
+            }
+        } else {
            // otherwise use the prompt as is
-            : params.prompt;
+            prompt = params.prompt;
+        }
+
        if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
            LOG_DBG("tokenize the prompt\n");
            embd_inp = common_tokenize(ctx, prompt, true, true);
@ -293,7 +305,7 @@ int main(int argc, char ** argv) {
    }

    // Should not run without any tokens
-    if (embd_inp.empty()) {
+    if (!params.conversation_mode && embd_inp.empty()) {
        if (add_bos) {
            embd_inp.push_back(llama_vocab_bos(vocab));
            LOG_WRN("embd_inp was considered empty and bos was added: %s\n", string_from(ctx, embd_inp).c_str());
@ -477,8 +489,8 @@ int main(int argc, char ** argv) {
        LOG_INF(       " - Press Ctrl+C to interject at any time.\n");
 #endif
        LOG_INF(       "%s", control_message);
-        if (params.conversation_mode && params.enable_chat_template && params.prompt.empty()) {
-            LOG_INF(   " - Using default system message. To change it, set a different value via -p PROMPT or -f FILE argument.\n");
+        if (params.conversation_mode && params.enable_chat_template && params.system_prompt.empty()) {
+            LOG_INF(   " - Not using system message. To change it, set a different value via -sys PROMPT\n");
        }
        LOG_INF("\n");

@ -774,7 +786,7 @@ int main(int argc, char ** argv) {
            }

            // deal with end of generation tokens in interactive mode
-            if (llama_vocab_is_eog(vocab, common_sampler_last(smpl))) {
+            if (!waiting_for_first_input && llama_vocab_is_eog(vocab, common_sampler_last(smpl))) {
                LOG_DBG("found an EOG token\n");

                if (params.interactive) {
@ -794,12 +806,12 @@ int main(int argc, char ** argv) {
            }

            // if current token is not EOG, we add it to current assistant message
-            if (params.conversation_mode) {
+            if (params.conversation_mode && !waiting_for_first_input) {
                const auto id = common_sampler_last(smpl);
                assistant_ss << common_token_to_piece(ctx, id, false);
            }

-            if (n_past > 0 && is_interacting) {
+            if ((n_past > 0 || waiting_for_first_input) && is_interacting) {
                LOG_DBG("waiting for user input\n");

                if (params.conversation_mode) {
@ -889,11 +901,12 @@ int main(int argc, char ** argv) {
                input_echo = false; // do not echo this again
            }

-            if (n_past > 0) {
+            if (n_past > 0 || waiting_for_first_input) {
                if (is_interacting) {
                    common_sampler_reset(smpl);
                }
                is_interacting = false;
+                waiting_for_first_input = false;
            }
        }