Merge branch 'master' into concedo_experimental

# Conflicts: # Makefile # tests/test-tokenizer-0-falcon.py # tests/test-tokenizer-0-llama.py
2025-09-11 01:24:36 +00:00 · 2023-11-20 22:37:06 +08:00 · 2023-11-20 22:37:06 +08:00 · 56a5fa7a60
commit 56a5fa7a60
parent 4d7c14be73 881800d1f0
21 changed files with 275 additions and 422 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -492,6 +492,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
            params.interactive_first = true;
        } else if (arg == "-ins" || arg == "--instruct") {
            params.instruct = true;
+        } else if (arg == "-cml" || arg == "--chatml") {
+            params.chatml = true;
        } else if (arg == "--infill") {
            params.infill = true;
        } else if (arg == "--multiline-input") {
@ -731,6 +733,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
    printf("  -i, --interactive     run in interactive mode\n");
    printf("  --interactive-first   run in interactive mode and wait for input right away\n");
    printf("  -ins, --instruct      run in instruction mode (use with Alpaca models)\n");
+    printf("  -cml, --chatml        run in chatml mode (use with ChatML-compatible models)\n");
    printf("  --multiline-input     allows you to write or paste multiple lines without ending each in '\\'\n");
    printf("  -r PROMPT, --reverse-prompt PROMPT\n");
    printf("                        halt generation at PROMPT, return control in interactive mode\n");
@ -932,7 +935,7 @@ void llama_batch_add(
    const std::vector<llama_seq_id> & seq_ids,
                               bool   logits) {
    batch.token   [batch.n_tokens] = id;
-    batch.pos     [batch.n_tokens] = pos,
+    batch.pos     [batch.n_tokens] = pos;
    batch.n_seq_id[batch.n_tokens] = seq_ids.size();
    for (size_t i = 0; i < seq_ids.size(); ++i) {
        batch.seq_id[batch.n_tokens][i] = seq_ids[i];