From 08f10f69c38288e9e8bb1f933af63a3fc9013d40 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 12 Jan 2025 12:15:53 +0200 Subject: [PATCH 01/27] llama : remove notion of CLS token (#11064) ggml-ci --- gguf-py/gguf/constants.py | 2 -- gguf-py/gguf/gguf_writer.py | 3 --- include/llama.h | 5 ++++- src/llama-vocab.cpp | 26 ++++++++------------------ src/llama-vocab.h | 1 - 5 files changed, 12 insertions(+), 25 deletions(-) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 56aa9288d..8fe84df21 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -184,7 +184,6 @@ class Keys: UNK_ID = "tokenizer.ggml.unknown_token_id" SEP_ID = "tokenizer.ggml.seperator_token_id" PAD_ID = "tokenizer.ggml.padding_token_id" - CLS_ID = "tokenizer.ggml.cls_token_id" MASK_ID = "tokenizer.ggml.mask_token_id" ADD_BOS = "tokenizer.ggml.add_bos_token" ADD_EOS = "tokenizer.ggml.add_eos_token" @@ -1837,7 +1836,6 @@ KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID -KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index bf851c92c..080d2b9dc 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -857,9 +857,6 @@ class GGUFWriter: def add_pad_token_id(self, id: int) -> None: self.add_uint32(Keys.Tokenizer.PAD_ID, id) - def add_cls_token_id(self, id: int) -> None: - self.add_uint32(Keys.Tokenizer.CLS_ID, id) - def add_mask_token_id(self, id: int) -> None: self.add_uint32(Keys.Tokenizer.MASK_ID, id) diff --git a/include/llama.h b/include/llama.h index 9f04bc622..a184884c7 100644 --- a/include/llama.h +++ b/include/llama.h @@ -937,7 +937,6 @@ extern "C" { LLAMA_API llama_token llama_vocab_bos(const struct llama_vocab * vocab); // beginning-of-sentence LLAMA_API llama_token llama_vocab_eos(const struct llama_vocab * vocab); // end-of-sentence LLAMA_API llama_token llama_vocab_eot(const struct llama_vocab * vocab); // end-of-turn - LLAMA_API llama_token llama_vocab_cls(const struct llama_vocab * vocab); // classification LLAMA_API llama_token llama_vocab_sep(const struct llama_vocab * vocab); // sentence separator LLAMA_API llama_token llama_vocab_nl (const struct llama_vocab * vocab); // next-line LLAMA_API llama_token llama_vocab_pad(const struct llama_vocab * vocab); // padding @@ -973,6 +972,10 @@ extern "C" { DEPRECATED(LLAMA_API llama_token llama_token_fim_rep(const struct llama_vocab * vocab), "use llama_vocab_fim_rep instead"); DEPRECATED(LLAMA_API llama_token llama_token_fim_sep(const struct llama_vocab * vocab), "use llama_vocab_fim_sep instead"); + // CLS is equivalent to BOS + DEPRECATED(LLAMA_API llama_token llama_vocab_cls(const struct llama_vocab * vocab), // classification + "use llama_vocab_bos instead"); + // // Tokenization // diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index ed8751737..d0fb85cea 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -1218,7 +1218,6 @@ struct llama_vocab::impl { llama_token special_unk_id = 0; llama_token special_sep_id = LLAMA_TOKEN_NULL; llama_token special_pad_id = LLAMA_TOKEN_NULL; - llama_token special_cls_id = LLAMA_TOKEN_NULL; // TODO: revisit if this is really needed https://github.com/ggerganov/llama.cpp/pull/10930 llama_token special_mask_id = LLAMA_TOKEN_NULL; llama_token linefeed_id = 13; @@ -1352,7 +1351,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { special_unk_id = LLAMA_TOKEN_NULL; special_sep_id = LLAMA_TOKEN_NULL; special_pad_id = LLAMA_TOKEN_NULL; - special_cls_id = LLAMA_TOKEN_NULL; special_mask_id = LLAMA_TOKEN_NULL; linefeed_id = LLAMA_TOKEN_NULL; @@ -1374,18 +1372,16 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { special_unk_id = 0; special_sep_id = LLAMA_TOKEN_NULL; special_pad_id = LLAMA_TOKEN_NULL; - special_cls_id = LLAMA_TOKEN_NULL; special_mask_id = LLAMA_TOKEN_NULL; } else if (tokenizer_model == "bert") { type = LLAMA_VOCAB_TYPE_WPM; // default special tokens - special_bos_id = LLAMA_TOKEN_NULL; + special_bos_id = 101; special_eos_id = LLAMA_TOKEN_NULL; special_unk_id = 100; special_sep_id = 102; special_pad_id = 0; - special_cls_id = 101; special_mask_id = 103; } else if (tokenizer_model == "gpt2") { type = LLAMA_VOCAB_TYPE_BPE; @@ -1420,7 +1416,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { special_unk_id = LLAMA_TOKEN_NULL; special_sep_id = LLAMA_TOKEN_NULL; special_pad_id = LLAMA_TOKEN_NULL; - special_cls_id = LLAMA_TOKEN_NULL; special_mask_id = LLAMA_TOKEN_NULL; } else if (tokenizer_model == "t5") { type = LLAMA_VOCAB_TYPE_UGM; @@ -1431,7 +1426,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { special_unk_id = 2; special_sep_id = LLAMA_TOKEN_NULL; special_pad_id = 0; - special_cls_id = LLAMA_TOKEN_NULL; special_mask_id = LLAMA_TOKEN_NULL; const int precompiled_charsmap_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP).c_str()); @@ -1712,7 +1706,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { { LLM_KV_TOKENIZER_UNK_ID, special_unk_id }, { LLM_KV_TOKENIZER_SEP_ID, special_sep_id }, { LLM_KV_TOKENIZER_PAD_ID, special_pad_id }, - { LLM_KV_TOKENIZER_CLS_ID, special_cls_id }, { LLM_KV_TOKENIZER_MASK_ID, special_mask_id }, { LLM_KV_TOKENIZER_FIM_PRE_ID, special_fim_pre_id }, { LLM_KV_TOKENIZER_FIM_SUF_ID, special_fim_suf_id }, @@ -2406,8 +2399,8 @@ std::vector llama_vocab::impl::tokenize( case LLAMA_VOCAB_TYPE_WPM: { if (add_special) { - GGML_ASSERT(special_cls_id != LLAMA_TOKEN_NULL); - output.push_back(special_cls_id); + GGML_ASSERT(special_bos_id != LLAMA_TOKEN_NULL); + output.push_back(special_bos_id); } llm_tokenizer_wpm_session session(vocab); @@ -2700,7 +2693,6 @@ void llama_vocab::impl::print_info() const { if (special_unk_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: UNK token = %d '%s'\n", __func__, special_unk_id, id_to_token[special_unk_id].text.c_str() ); } if (special_sep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: SEP token = %d '%s'\n", __func__, special_sep_id, id_to_token[special_sep_id].text.c_str() ); } if (special_pad_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: PAD token = %d '%s'\n", __func__, special_pad_id, id_to_token[special_pad_id].text.c_str() ); } - if (special_cls_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: CLS token = %d '%s'\n", __func__, special_cls_id, id_to_token[special_cls_id].text.c_str() ); } if (special_mask_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: MASK token = %d '%s'\n", __func__, special_mask_id, id_to_token[special_mask_id].text.c_str() ); } if (linefeed_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: LF token = %d '%s'\n", __func__, linefeed_id, id_to_token[linefeed_id].text.c_str() ); } @@ -2834,7 +2826,7 @@ llama_token_attr llama_vocab::token_get_attr(llama_token id) const { } llama_token llama_vocab::token_bos() const { - return pimpl->type != LLAMA_VOCAB_TYPE_WPM ? pimpl->special_bos_id : pimpl->special_cls_id; + return pimpl->special_bos_id; } llama_token llama_vocab::token_eos() const { @@ -2853,10 +2845,6 @@ llama_token llama_vocab::token_unk() const { return pimpl->special_unk_id; } -llama_token llama_vocab::token_cls() const { - return pimpl->special_cls_id; -} - llama_token llama_vocab::token_sep() const { return pimpl->special_sep_id; } @@ -3069,8 +3057,9 @@ llama_token llama_vocab_eot(const struct llama_vocab * vocab) { return vocab->token_eot(); } +// deprecated llama_token llama_vocab_cls(const struct llama_vocab * vocab) { - return vocab->token_cls(); + return vocab->token_bos(); } llama_token llama_vocab_sep(const struct llama_vocab * vocab) { @@ -3159,7 +3148,8 @@ llama_token llama_token_eot(const struct llama_vocab * vocab) { // deprecated llama_token llama_token_cls(const struct llama_vocab * vocab) { - return llama_vocab_cls(vocab); + //return llama_vocab_cls(vocab); + return llama_vocab_bos(vocab); // avoid deprecation warning } // deprecated diff --git a/src/llama-vocab.h b/src/llama-vocab.h index 020f2b533..5ce355214 100644 --- a/src/llama-vocab.h +++ b/src/llama-vocab.h @@ -53,7 +53,6 @@ struct llama_vocab { llama_token token_eot() const; llama_token token_eom() const; llama_token token_unk() const; - llama_token token_cls() const; llama_token token_sep() const; llama_token token_nl () const; llama_token token_pad() const; From 9a483999a6fda350772aaf7bc541f1cb246f8a29 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sun, 12 Jan 2025 13:45:14 +0100 Subject: [PATCH 02/27] llama : fix chat template gguf key (#11201) --- common/common.cpp | 11 ++--------- src/llama-arch.cpp | 2 +- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 39bfb0c2e..1a2e15247 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1636,15 +1636,8 @@ std::string common_detokenize(const struct llama_vocab * vocab, const std::vecto // std::string common_get_builtin_chat_template(const struct llama_model * model) { - static const char * template_key = "tokenizer.chat_template"; - // call with NULL buffer to get the total size of the string - int32_t res = llama_model_meta_val_str(model, template_key, NULL, 0); - if (res > 0) { - std::vector model_template(res + 1, 0); - llama_model_meta_val_str(model, template_key, model_template.data(), model_template.size()); - return std::string(model_template.data(), model_template.size() - 1); - } - return ""; + const char * ptr_tmpl = llama_model_chat_template(model); + return ptr_tmpl == nullptr ? "" : ptr_tmpl; } bool common_chat_verify_template(const std::string & tmpl) { diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index 5c1f14cfd..d7d277e72 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -178,7 +178,7 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" }, { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" }, { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" }, - { LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat.template" }, + { LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" }, { LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" }, { LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" }, { LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" }, From 924518e2e5726e81f3aeb2518fb85963a500e93a Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Sun, 12 Jan 2025 18:23:10 +0000 Subject: [PATCH 03/27] Reset color before we exit (#11205) We don't want colors to leak post termination of llama-run. Signed-off-by: Eric Curtin --- examples/run/run.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/run/run.cpp b/examples/run/run.cpp index bfa8378bb..0ad8bb15b 100644 --- a/examples/run/run.cpp +++ b/examples/run/run.cpp @@ -29,7 +29,7 @@ #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32) [[noreturn]] static void sigint_handler(int) { - printf("\n"); + printf("\n\033[0m"); exit(0); // not ideal, but it's the only way to guarantee exit in all cases } #endif From 1244cdcf14900dd199907b13f25d9c91a507f578 Mon Sep 17 00:00:00 2001 From: Radoslav Gerganov Date: Mon, 13 Jan 2025 13:31:41 +0200 Subject: [PATCH 04/27] ggml : do not define GGML_USE_CUDA when building with GGML_BACKEND_DL (#11211) Build fails when using HIP and GGML_BACKEND_DL: ``` /usr/bin/ld: ../ggml/src/libggml.so: undefined reference to `ggml_backend_cuda_reg' collect2: error: ld returned 1 exit status ``` This patch fixes this. --- ggml/src/ggml-hip/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-hip/CMakeLists.txt b/ggml/src/ggml-hip/CMakeLists.txt index b15fbd24d..d090ba9bd 100644 --- a/ggml/src/ggml-hip/CMakeLists.txt +++ b/ggml/src/ggml-hip/CMakeLists.txt @@ -70,7 +70,9 @@ ggml_add_backend_library(ggml-hip ) # TODO: do not use CUDA definitions for HIP -target_compile_definitions(ggml PUBLIC GGML_USE_CUDA) +if (NOT GGML_BACKEND_DL) + target_compile_definitions(ggml PUBLIC GGML_USE_CUDA) +endif() add_compile_definitions(GGML_USE_HIP) From 8f70fc3d1b1d3c17b61842330dd106d391cc1227 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Mon, 13 Jan 2025 13:38:20 +0100 Subject: [PATCH 05/27] llama : remove 'd' from bad special token log (#11212) This commit removes the 'd' from the log message in llama-vocab.cpp when logging a bad special token. The motivation for this is that currently the output can look something like the following: ```console load: bad special token: 'tokenizer.ggml.image_token_id' = 128256d, using default id -1 ``` --- src/llama-vocab.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index d0fb85cea..96b74e93a 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -1729,7 +1729,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { continue; } if (new_id >= id_to_token.size()) { - LLAMA_LOG_WARN("%s: bad special token: '%s' = %ud, using default id %d\n", + LLAMA_LOG_WARN("%s: bad special token: '%s' = %u, using default id %d\n", __func__, key.c_str(), new_id, id); } else { id = new_id; From 7426a26b2492fc546a4db6991e871ee605714093 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 13 Jan 2025 14:46:36 +0200 Subject: [PATCH 06/27] contrib : add naming guidelines (#11177) * contrib : add naming guidelines * contrib : expand naming guidelines [no ci] * contrib : cont [no ci] * contrib : add `_t` suffix guideline [no ci] * contrib : cont [no ci] * minor [no ci] * contrib : move coding guidelines to correct section [no ci] * contrib : minor reword coding guidelines [no ci] * contrib : add TODO for preprocessor directives [no ci] * contrib : expand [no ci] * minor [no ci] * contrib : clarify `_context` suffix usage [no ci] * contrib : filename guidelines [no ci] * contrib : fix notes [no ci] --- CONTRIBUTING.md | 102 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 96 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5a85ec5d2..a86f00ac6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,10 +1,10 @@ # Pull requests (for contributors) - Test your changes: - - Execute [the full CI locally on your machine](ci/README.md) before publishing - - Verify that the perplexity and the performance are not affected negatively by your changes (use `llama-perplexity` and `llama-bench`) - - If you modified the `ggml` source, run the `test-backend-ops` tool to check whether different backend implementations of the `ggml` operators produce consistent results (this requires access to at least two different `ggml` backends) - - If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops` + - Execute [the full CI locally on your machine](ci/README.md) before publishing + - Verify that the perplexity and the performance are not affected negatively by your changes (use `llama-perplexity` and `llama-bench`) + - If you modified the `ggml` source, run the `test-backend-ops` tool to check whether different backend implementations of the `ggml` operators produce consistent results (this requires access to at least two different `ggml` backends) + - If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops` - Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly - If your PR becomes stale, don't hesitate to ping the maintainers in the comments @@ -20,14 +20,104 @@ - Avoid adding third-party dependencies, extra files, extra headers, etc. - Always consider cross-compatibility with other operating systems and architectures - Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple -- There are no strict rules for the code style, but try to follow the patterns in the code (indentation, spaces, etc.). Vertical alignment makes things more readable and easier to batch edit +- Vertical alignment makes things more readable and easier to batch edit - Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a` -- Naming usually optimizes for common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963) +- Use sized integer types in the public API +- Declare structs with `struct foo {}` instead of `typedef struct foo {} foo` + - In C++ code omit optional `struct` and `enum` keyword whenever they are not necessary + ```cpp + // OK + llama_context * ctx; + const llama_rope_type rope_type; + + // not OK + struct llama_context * ctx; + const enum llama_rope_type rope_type; + ``` + + _(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline.)_ + +- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` to format the added code +- For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines) - Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices - Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$ ![matmul](media/matmul.png) +# Naming guidelines + +- Use `snake_case` for function, variable and type names +- Naming usually optimizes for longest common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963) + + ```cpp + // not OK + int small_number; + int big_number; + + // OK + int number_small; + int number_big; + ``` + +- Enum values are always in upper case and prefixed with the enum name + + ```cpp + enum llama_vocab_type { + LLAMA_VOCAB_TYPE_NONE = 0, + LLAMA_VOCAB_TYPE_SPM = 1, + LLAMA_VOCAB_TYPE_BPE = 2, + LLAMA_VOCAB_TYPE_WPM = 3, + LLAMA_VOCAB_TYPE_UGM = 4, + LLAMA_VOCAB_TYPE_RWKV = 5, + }; + ``` + +- The general naming pattern is `_`, with `` being `_` + + ```cpp + llama_model_init(); // class: "llama_model", method: "init" + llama_sampler_chain_remove(); // class: "llama_sampler_chain", method: "remove" + llama_sampler_get_seed(); // class: "llama_sampler", method: "get_seed" + llama_set_embeddings(); // class: "llama_context", method: "set_embeddings" + llama_n_threads(); // class: "llama_context", method: "n_threads" + llama_adapter_lora_free(); // class: "llama_adapter_lora", method: "free" + ``` + + - The `get` `` can be omitted + - The `` can be omitted if not necessary + - The `_context` suffix of the `` is optional. Use it to disambiguate symbols when needed + - Use `init`/`free` for constructor/destructor `` + +- Use the `_t` suffix when a type is supposed to be opaque to the user - it's not relevant to them if it is a struct or anything else + + ```cpp + typedef struct llama_context * llama_context_t; + + enum llama_pooling_type llama_pooling_type(const llama_context_t ctx); + ``` + + _(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline)_ + +- C/C++ filenames are all lowercase with dashes. Headers use the `.h` extension. Source files use the `.c` or `.cpp` extension +- Python filenames are all lowercase with underscores + +- _(TODO: abbreviations usage)_ + +# Preprocessor directives + +- (TODO: add guidelines with examples and apply them to the codebase) + + ```cpp + #ifdef FOO + #endif // FOO + ``` + +# Documentation + +- Documentation is a community effort +- When you need to look into the source code to figure out implementation details to figure out how to use an API consider adding a short summary to the header file for future reference +- When you notice incorrect or outdated documentation, please update it + # Resources The Github issues, PRs and discussions contain a lot of information that can be useful to get familiar with the codebase. For convenience, some of the more important information is referenced from Github projects: From 00b4c3da6202e855087a4986bf19bb41b959e333 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 13 Jan 2025 13:56:23 +0100 Subject: [PATCH 07/27] common : support tag-based --hf-repo like on ollama (#11195) * common : support tag-based hf_repo like on ollama * fix build * various fixes * small fixes * fix style * fix windows build? * move common_get_hf_file to common.cpp * fix complain with noreturn --- common/arg.cpp | 33 ++++++++++----- common/common.cpp | 106 +++++++++++++++++++++++++++++++++++++++++++--- common/common.h | 8 ++++ 3 files changed, 130 insertions(+), 17 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 27886b84e..1457a360f 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -130,17 +130,26 @@ std::string common_arg::to_string() { static void common_params_handle_model_default( std::string & model, - std::string & model_url, + const std::string & model_url, std::string & hf_repo, - std::string & hf_file) { + std::string & hf_file, + const std::string & hf_token) { if (!hf_repo.empty()) { // short-hand to avoid specifying --hf-file -> default it to --model if (hf_file.empty()) { if (model.empty()) { - throw std::invalid_argument("error: --hf-repo requires either --hf-file or --model\n"); + auto auto_detected = common_get_hf_file(hf_repo, hf_token); + if (auto_detected.first.empty() || auto_detected.second.empty()) { + exit(1); // built without CURL, error message already printed + } + hf_repo = auto_detected.first; + hf_file = auto_detected.second; + } else { + hf_file = model; } - hf_file = model; - } else if (model.empty()) { + } + // make sure model path is present (for caching purposes) + if (model.empty()) { // this is to avoid different repo having same file name, or same file name in different subdirs std::string filename = hf_repo + "_" + hf_file; // to make sure we don't have any slashes in the filename @@ -290,8 +299,8 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context } // TODO: refactor model params in a common struct - common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file); - common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file); + common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token); + common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token); if (params.escape) { string_process_escapes(params.prompt); @@ -1583,21 +1592,23 @@ common_params_context common_params_parser_init(common_params & params, llama_ex } ).set_env("LLAMA_ARG_MODEL_URL")); add_opt(common_arg( - {"-hfr", "--hf-repo"}, "REPO", - "Hugging Face model repository (default: unused)", + {"-hf", "-hfr", "--hf-repo"}, "/[:quant]", + "Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n" + "example: unsloth/phi-4-GGUF:q4_k_m\n" + "(default: unused)", [](common_params & params, const std::string & value) { params.hf_repo = value; } ).set_env("LLAMA_ARG_HF_REPO")); add_opt(common_arg( {"-hff", "--hf-file"}, "FILE", - "Hugging Face model file (default: unused)", + "Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)", [](common_params & params, const std::string & value) { params.hf_file = value; } ).set_env("LLAMA_ARG_HF_FILE")); add_opt(common_arg( - {"-hfrv", "--hf-repo-v"}, "REPO", + {"-hfv", "-hfrv", "--hf-repo-v"}, "/[:quant]", "Hugging Face model repository for the vocoder model (default: unused)", [](common_params & params, const std::string & value) { params.vocoder.hf_repo = value; diff --git a/common/common.cpp b/common/common.cpp index 1a2e15247..a6f9252b2 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -73,6 +73,22 @@ #include #endif #define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083 + +// +// CURL utils +// + +using curl_ptr = std::unique_ptr; + +// cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one +struct curl_slist_ptr { + struct curl_slist * ptr = nullptr; + ~curl_slist_ptr() { + if (ptr) { + curl_slist_free_all(ptr); + } + } +}; #endif // LLAMA_USE_CURL using json = nlohmann::ordered_json; @@ -1130,7 +1146,8 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma static bool common_download_file(const std::string & url, const std::string & path, const std::string & hf_token) { // Initialize libcurl - std::unique_ptr curl(curl_easy_init(), &curl_easy_cleanup); + curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); + curl_slist_ptr http_headers; if (!curl) { LOG_ERR("%s: error initializing libcurl\n", __func__); return false; @@ -1144,11 +1161,9 @@ static bool common_download_file(const std::string & url, const std::string & pa // Check if hf-token or bearer-token was specified if (!hf_token.empty()) { - std::string auth_header = "Authorization: Bearer "; - auth_header += hf_token.c_str(); - struct curl_slist *http_headers = NULL; - http_headers = curl_slist_append(http_headers, auth_header.c_str()); - curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers); + std::string auth_header = "Authorization: Bearer " + hf_token; + http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); } #if defined(_WIN32) @@ -1444,6 +1459,80 @@ struct llama_model * common_load_model_from_hf( return common_load_model_from_url(model_url, local_path, hf_token, params); } +/** + * Allow getting the HF file from the HF repo with tag (like ollama), for example: + * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4 + * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M + * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s + * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo) + * + * Return pair of (with "repo" already having tag removed) + * + * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files. + */ +std::pair common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & hf_token) { + auto parts = string_split(hf_repo_with_tag, ':'); + std::string tag = parts.size() > 1 ? parts.back() : "latest"; + std::string hf_repo = parts[0]; + if (string_split(hf_repo, '/').size() != 2) { + throw std::invalid_argument("error: invalid HF repo format, expected /[:quant]\n"); + } + + // fetch model info from Hugging Face Hub API + json model_info; + curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); + curl_slist_ptr http_headers; + std::string res_str; + std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag; + curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); + typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data); + auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t { + static_cast(data)->append((char * ) ptr, size * nmemb); + return size * nmemb; + }; + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_str); +#if defined(_WIN32) + curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); +#endif + if (!hf_token.empty()) { + std::string auth_header = "Authorization: Bearer " + hf_token; + http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); + } + // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response + http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); + http_headers.ptr = curl_slist_append(http_headers.ptr, "Accept: application/json"); + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); + + CURLcode res = curl_easy_perform(curl.get()); + + if (res != CURLE_OK) { + throw std::runtime_error("error: cannot make GET request to HF API"); + } + + long res_code; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code); + if (res_code == 200) { + model_info = json::parse(res_str); + } else if (res_code == 401) { + throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token"); + } else { + throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str())); + } + + // check response + if (!model_info.contains("ggufFile")) { + throw std::runtime_error("error: model does not have ggufFile"); + } + json & gguf_file = model_info.at("ggufFile"); + if (!gguf_file.contains("rfilename")) { + throw std::runtime_error("error: ggufFile does not have rfilename"); + } + + return std::make_pair(hf_repo, gguf_file.at("rfilename")); +} + #else struct llama_model * common_load_model_from_url( @@ -1465,6 +1554,11 @@ struct llama_model * common_load_model_from_hf( return nullptr; } +std::pair common_get_hf_file(const std::string &, const std::string &) { + LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__); + return std::make_pair("", ""); +} + #endif // LLAMA_USE_CURL // diff --git a/common/common.h b/common/common.h index d523948b0..c86a4ef39 100644 --- a/common/common.h +++ b/common/common.h @@ -454,6 +454,11 @@ static bool string_starts_with(const std::string & str, return str.rfind(prefix, 0) == 0; } +static bool string_ends_with(const std::string & str, + const std::string & suffix) { // While we wait for C++20's std::string::ends_with... + return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0; +} + bool string_parse_kv_override(const char * data, std::vector & overrides); void string_process_escapes(std::string & input); @@ -501,6 +506,9 @@ struct llama_model * common_load_model_from_hf( const std::string & local_path, const std::string & hf_token, const struct llama_model_params & params); +std::pair common_get_hf_file( + const std::string & hf_repo_with_tag, + const std::string & hf_token); // clear LoRA adapters from context, then apply new list of adapters void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora); From ca001f6656c1c3d29ef479b3aa5d669453e63be5 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 13 Jan 2025 15:08:44 +0200 Subject: [PATCH 08/27] contrib : add naming guidelines (cont) (#11177) --- CONTRIBUTING.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a86f00ac6..dc58dbd51 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,7 +22,7 @@ - Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple - Vertical alignment makes things more readable and easier to batch edit - Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a` -- Use sized integer types in the public API +- Use sized integer types such as `int32_t` in the public API, e.g. `size_t` may also be appropriate for allocation sizes or byte offsets - Declare structs with `struct foo {}` instead of `typedef struct foo {} foo` - In C++ code omit optional `struct` and `enum` keyword whenever they are not necessary ```cpp @@ -115,7 +115,7 @@ # Documentation - Documentation is a community effort -- When you need to look into the source code to figure out implementation details to figure out how to use an API consider adding a short summary to the header file for future reference +- When you need to look into the source code to figure out how to use an API consider adding a short summary to the header file for future reference - When you notice incorrect or outdated documentation, please update it # Resources From 437e05f714cdd67757405221578d3f95f8228b63 Mon Sep 17 00:00:00 2001 From: ebraminio Date: Mon, 13 Jan 2025 17:16:39 +0330 Subject: [PATCH 09/27] server : (UI) Support for RTL text as models input or output (#11208) --- examples/server/public/index.html.gz | Bin 1206458 -> 1206472 bytes examples/server/webui/index.html | 5 +++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/server/public/index.html.gz b/examples/server/public/index.html.gz index 3640a7a6cfa76764d93684e9051a32c263932c8a..489bff84b98f7bd7ebcdabeb7d0f9b31093eae58 100644 GIT binary patch delta 8441 zcmVmhbut>;wM=_HnN~*xyWHtL~d^xz9Vl2fx30J%0rJ zcr~A^^Xnhsd&&*iC;b-QXL7xrfA@O+1>}puD$p>dj86otJ@bZ=C4+B1HNN`eNE=|0)*0EU;lmn%6Ses==0af^Q$d<_q+1^diCNv z(9Db1FK*|5{c%pWFW%1IyngAwe>2mY>$(1B<*SS6ZVO)){#^fI$i7bE#8G2|8H+Xe_r;ra`Owz zM|nP(IP>f6#l;C)CRyatV~J)PLLXBioqUw5}MCcp$E&1E~8#jng z-v$xM(JQh=Z<5H3Pjiw5%d;?u30{+6wWc#w*3Kw-pg4$q64RN&KV>BLNfyMbnY*D$ z!3O=8CmiK?9jw;aOJbV2e>ugj|H~$)vp9*#AA%@NGU~?kY>{L>$*`A%NjCE~Sw>=d z1s_UM{13h-Sso0D|4ANOfJbJ9A52v+%Ck5Y` za#nUPeK?O zThQH;ALME1KF$_l;@wp>7j90TDv}uqT^ihzv+X*d1m~&iky)HqwA?7qqEkVh#i=mhiw-99n_Hm|UZ+;C z+%Q;WZn|!$WSO~l0uLzYX#ZvA-qDRXgwXd^`?9c~MsNRtf94ny%DqfroqJiAsd`zE zQnBIk^b;(+>~f`l!L?WTfe>`<#5yi_1Lupg&Xe$!vgy33bIpvVuoQOEyD^0y>Pm!Cgf*5B^BCOSB zBVHx%(gYu`-~(Zk9?T+~zLq|8>*TDuzazg$9-deo98ePFGnjawbIl=LhUDQ4eqld= znHcotCE+I8%?@jzRm`V~@<*Y3XTj?rhI$8(ubV^Je=Kqzie4%$ZASSf%{PlDww~er z4#~3S>dVaaYNJ`J=aE=I6R3$~_>I~8Ul=}DW8out5(fTRmr0y;DZzL1HB_lIA%1GI ztZsk}LBZ}yyq|5t-G>b<;&|=qlJ@}2W4A=A=?s=B`wiO*-L&Em)<0ySn|3XL8Uq6a zF6AJOe@Q0LX_jTXvZvNctwC)WJTz$4OR}XbFR_;E-i z>J`go`D2mYBmhteNQp_DZ!(g}8OaF;PgQ2We|L-R;6o6v+7+H^a65*wHbhygQw;<{{%bRA{Tca$1^LspGZ(yPP1sS5O`_JQ(iL$0s2Twiwd@@EVWZHR|99$vH| ze+bYzAV9T>AA1ZFV~B}}1^OrH$Q)uKf`U1QwlzfC62a_~v}6x4vBx8iHiQmZ2RayI zXitY|Pd^EHv_Wjp+StIb=Myxq4*Rg$fS_?~^dY3s+el%nhUS>IYUpElXhS?S0gcR( z%NQQ|5D$Gk8tFqop?3g9sUdAThKV`Ee?$Zdty0n$Ce{!W5i!b+IMx^@_7D?WVDf1e zqE440iwncH-?BCznw}z;TcrlK+Jw)98l%@|nrz-+^TT?^eD&chhuVF-{@{!GX4Fo$@U3T$j^92;v0I;;-p7(-ecA}u1u z7}EL>X%Q&KkT!-$i})~xv^hk&4h!;h3~6hKv^5@atRXP4I>101L)#gmEn>hp%bpGq z7eT<*Mvdh@6tKEbK(WX9<*;Ll?r$6#PO*pl-fsK7p^lr*?I8fL+W=sWe<7_8krn{J zbjHn4_7D=-Z6p}G;<1My!EQr>JxWK`5N+$XM5FB?bgAExct ziVpve;b9N)uzw3gO^4uP+JcYK5H%eFl4%=AMme4`=y@WHXo{lgs^O^SC^o|mF2abR zPaWWL@R1E=@Hmyy0VIa1IEtblx*b%|&<43^4Y<;#mSP<0g9QzJkc$8&x^A1QrkKVk zCdMEW0Znwrv^7Vue^h%E7juw{04Rp8PYpw{rsMiz4RW!>Wt*m2ieewS;aJeH2f5e{ z;4-J0axlghG@L;$P6N7hRWT1St3ZIFxBKrq#`P4i%Ee;Gc&**?K3YmTa$is5ME zxEO<6L~zkm)6fmYF-CDQ2e}B~qFbu1Te@zH<6;eR5y3@Kr`lAr6mt|8dytFWfG}M( z)q}T;)!{>*?Ngs@?PY@njAAk!WHN0an5pZwrK#hbQW^G2rGa6VqH2yc&Mnmgyua_L zwok$!Dq7WGe?rWng7V%LT2BkDw}sx*Lho&1^t3QKTSS|X7A0Hv-`j&;s=N5nFyg&}nZGT}dvEa4?d>UsfvoT){l zQm^Gle^8s}rnAL{(t6o1Nf6JNFO{Y21GBoBQIyxXeqS$5mdl(#He*q?lcO{1 zF^JbB3uu2rd@T(pzh9x$rR%3c4Zym*7euQv(WlHDjma5g=@pEs&zvs*xXVRTYPPqt z?wn?Xdh3RHxAXdYGS9>uqJkEG*37${K`EpTv0WhD$fg zk}XSGD)Pnp`_LEwQ<1eUTFjoGhN%?kY@O5=nd^~Hl1ejWwMmD2GpD4Pe;m=nCZ$j7 zn<&!*=6Xt9|EYTK0}}QrJ-??859=cZu>`Yv#3KTscDb4?Ywoncau32Sl7|K_VF6#y ze~(?VH)uerYmpHsD+Y;*tiVO`fcdOFtFr5McW>XUC#1F|-S^hBuC%%iKf z%aaY=lN%`BDQ118u0J$}23Yt~OuQ84{ zQOos@CFCU=7`bCs^Tn_6tKC_e~yFL%SZ&ZcQtla5{38x+EEnlrCi>x&2<@G zg&XqJb{x-#H>%LW0!L&pPcp0(-jZy+?`CA z5w<9{S@%rqxFFh?Ex4J$697`q`h2r&ds)0!M<=G1uSSl zCA7x5mk|?6XoDC+=WDH^mayli&__`S^{$c_-Ztfe&*bNmwN?#CZCC`YAXt?racztS zHC*ss&=@Ufj92RahDVGGss0?YlORBgpjg-}+>AM_gzf{feV#mAND5MrY9ZBXiGNh4*UzTjs*@1yA{?-fooa)uZ$?NW%K1!u8WJ{Qkt}e0yNzi z=ce&PkssU(hQrJT9yC$1L|)y-8g|1heO3(5lfdXmw3pJ@bLwXFXolc@Z>OyW*!5t# zIGUk|dxg3&se?`~m;^G1+^#&g$^k**&yt@l2Cfm9rG=~2R_O7^kFWP-?^ynTl zKY%tUh(!%ZpU_{#{Fc9Q=zzWhKRNJOpTW9=cm@6Y+VAOqPUsK2YR5(M6AAf2b|Jz1 z*F+Quf0bTEvrX)|lt`CPPn>nB`=6Abl++ zI0u5E=%Iyo0;nN3BPe%a5~K-QWXU!s*-?(d#B;-2nq=;Z^ot~X6z!bZIJ_;2skmp( z^42abr>9~XDHXB;JQrt+Y9`y37tsUnWn+q9ykU(%GQpt^oH!ziEtoAtpevRTB;F78 zp9|=JZn);_d`2Fc1?szESqap5nvaoFZCAuM+!Fnf46LccniWS0PpcTW|z$2 zw1*H03kFTP3_~|^WiL&8K;LEWr7A2ZN5?ct5nD!|bRmD;RO=SnG!>4i!SX_C!ulmr zYU{Kl^^+}l8o$6K_S?e^VoI_sOMOcjgwBgQ)Lpk(EZH1mTT z!ma*=6y#r#m}D*u;#JpD;&(QDy3Po}enN9x2DcAfGdMI_#IjrArJ$GKrm6=1XW<$_*S6qF?p zXpVXhx7MmJMv4--s>nGc;!4HFK+6C%i;`(gT73S#^>GIsYye{saPh5EB z2)e1&r%RG$U=9@nrh;Db>~0_N^T{S`B1vPmZp_z764D?=#or>-B_ZFXE~Y_xA>9%R zE|E)p?zZ!(+S+8Dz~XEftgZt$OjhWEEq_Kie;j+ZESUE@{qOIYD*{iIqg~NKhHRUL ztIL{UA^w|B01C2XW2zzX4*L(YJ2xh@M2@Smds@|2MFwlBAn^{&BgLIAKNJzdo?1wi zbwl^CY-%=EWqXQsS+UIW(gZ>wmX)a?>*~~%9b1Qg1t&=IHf2)_v8+usEUT7>RoODI zf9y;(ti$Old8%kwlT8D`FEx}^a|)H4q-0Y=j9iF;6L9pH6qRI%Nrn`}#0tw=4MM0+ zNVR~K$HYM(m^@Wf1acMPu-{=JyxHs@s`^(dzyoPeO`778>j`S%4n1)?m=V&ZkOb~A zVVtSXg~6aDCJW|TV$$FaJwf-l+L;2_f5>=juqw%Lp(j3kT%EW>Q@BP-wHPxGL@GuQ z#*K|27m5Am}dcZ2+K6Ubf?O}r#ZlbFObpRne5*L>2Bx{aEE-vYgk=~oD{cC4Xy9}1cs z+5#GfFPA%Kmfv~yP2G@CO0n*3)!;jI3I3|_PSqbtCNe%S zW!w z42Ii#=RMSnkK_^i!?wKoB7ag3sVaup2>svp^tr(YC*u;dgEi> z@TA8z5Q5v?O6YkmE20lUk4DR=DF@9U=IELCxhC6!>Jg3aLS?QWetIPi`K8!U2p{=mf7+VUpPsrmFVH`tqg$Ab-8vz@AA)A+=m>rMc<9bVl&`?UN zm-qq2o1A1g%2(KpeVmb^F~oBHfPSBo>?U8$WB*Ncr$uJhe+V;?cb{+a73}$nS~y_; zDnoq1)4(jo|M7 z?>mskC+Ol5J$)P;xT@kIV>S#m{G`LDZL&BWn=BBgb2H*T>zZJSd#Jp?{74t^(Uu(? z5IbMPnF|TNfAFP-49B;CI$ylt+h+-KeZY;FY!MtPIuQxo<0$cA{$ZVM4fUVz$RkXT zGV7R39$b&gX$IRQ6ef(=zu{c>He)c@Y+N!#UtFN0{~8~O6#7DZLM4Xy`3f^q;;r5G zWv%$~d{U}c7?>h1$erd70>9?MEzN?+%^vxdNNwiaeh0(NK!7Nw0C?nkL>uaq}|Q$sb8*SZkk4p;RZDdt>dRQY_e*1Mf_jelY%=cda2 z8mn4Hf1;D0_s{)l^N1B}I1Aj+f`rMowz(X{J)6ttlT@Hx2BI&vyC1~Mq`dicyiBB>WAj3plLTb!m!e~@w>mtOMdk7r z)S*fJ!3fq+y1aeNDTz?w7F}z=3iF0BaE&!Xe?l_>_KpfI%+L|~0tt$dbH==K$)+;I z+kLRmaomNHC=FdoYR5d5!Rj0G2uASJ5!?n(++^hF6dlndeScS8q;L@hadF{3yr;<> ziSwhMTf5^2nn}g{Gup{d715tZ*vh2TOY2a<+i$PGov|q>N|?h$X$Z?3cDj@J#jxe4 ze<%%!o5QI^w%B10EGUR+g8X2)BoNp^Qu&2i63Y3PAhpgE0n6Ls3zE>cbWJkO68@I% z+gx}NNMgvVV;cnzR9^5yW8!hX1=e;bav$WuA|$O*+(c}qXwbzppdr~onRMAL=J})` z(C=s!$s+90z2AMy3Fqk74cU;cUC=%De^xx4iZrAKS{0(_pB1l4qK8{yyT=`U9dKa( zF?ONK`^=43%!jKI^1x%W%%2}q*jwkW{6M|;X_hS9MGyw`u`GhU_#CjR<>3D@oxI=C zyC3JdTlIVAcO-U08j$?ouV=PJ{48EMV1pS+iA&#yZcg9FU`<6q%A{fTmBLB#f7m1S zvSmho-H_OOtQY#(<9o-&sV5rX+feC;y*38%Yx_rJ!A~| z%?0wm7xQh%_gQTBzz1MhAr{Y5L4Hl{`;KPNbxHcX_1JUf#3zf*>TAa7nnVdZKyxBp9<%Zj#P75v z=sl0rwUIGKi5=2&7wgHk*cG%U)OE6b%T}Daj z3bbw?>h16GsE3KlI<7W3L`fkzJ0cWj^ma(5_Xz~?Br55$pz@d7Z(o(3e|r37lJNy8 zAk8Vt$dboO&Z~!!>Z|`Gw9d$q?@Pjl?JA(_&4TUMu2v*lxp8tosm?}D7GbiOL~c$= zHX#peC6Z5alHHSR^5(_-`sRfk`4Xa#b5Ad%_lwYt?}HIod*36m&;ATw`^ z{KVTp{##*g?&V3}CfVr6e=6ar>IIj1hMv%$e%&PWEDEwDr!M86W=K7)Zp81?vQ&{( zI9A>?I*;kT?+L5tDeEva?u^?t&Y!GPO^;`Fzh@_$9upm)+3GJA&e`^6F{(x65X5ffD z;b86N6%zCCYcUYJl}~N2rijo=wY0Qmmc$W+V;jj2NBn$!krD!%32^EF{Q)(A_r(=<4hqBoJ7-9dP>m5))cRqa6U7w=Pt$P zbBnrH#dbJwg%^^1f0l4`{TM6@%&|}9&ii&3aNA;sh1TH>Puy7)wKo>C$R-E4 z0N;F#DArJv)uW^hg_P>JVj15D`WTQEDc^)y}$u#*AzAOG%JwYEoB09+j&Sv+XjRF7tfK>(YY}D#FOjjBy z_`~IQSrVmS=9f)xgddE!+8RB8ChucN0qbrPM5L=lltb^Lyt`GGs#vqVdg>r`zPQ}& zos8^RIX0s@S2h~ozk^JnMQ!+;MH8YL^L}}`TU>EKeXpEyE4jA_0q(F^5lSduL z4oF7$*Z=k3{_X$#cl5#!=(Fx)Qe3@zjtK1QQq}ITel=!B;jqJTl&?+>>C*rG-~aQU z(e6k^e_ywLH>tfOedM$6LBjrze?eDC`q(LEW#x5W*t@)4Jnll4e7Ce~dD|b8Dwn## zLvit11^B&NYr$);Fa!l^DWgJ+VCANrp242j*Cb3UTS{WY6baLR!VcJ>T8O{*y$hWa zs?>O@8lDYnQK_e=g5^K`6WT>SAxBt5rhRkRe?UIgA(I+UST5p=wQwScb#xpjqW0W- zHweMXPK2XYeNRlqD7+*YL0gE)Wm`A%%SA6g0G_&RQ;R0ZTL&Qo>59YOrzj_6Rvi$} zsq5X9F+mn~_;o{ahQt%Y)>Uw>qKJJwZ`iJu+`yXCEe+T=|@8-|{KL0YFzuP`{=U>Y6zpb7x`J@eJ6<*)W ze_35!&tK_tW&7gA{MBl%!*>kuugSb#fKb}&>%Y%m+0P*def~Orezk?~ewUwLuU>oy zntAd1#qIpBKhD|q#oPIt*Dt+ye@1e1J=fl>JY^ByZQ;wppKCvC$lLkj%?G!*|Cj4; zUl;f9UOg}Gzm~uL@{Rf>TfBQw)VncW&42iweD#C;Lry*g-6P9i$drau$Q($uc^OUvza2PXN*2D>_;Ar*i6npr8M$r>PM>?*|4}^ zgZ|4C4l}&=S8MFX5lczNe=zZW*<@@M#S#63A0~0iNW{(-aq7_&yKxYwGk24wG-6lq zp(K_6!S^)H{5YDCC8OyR5CtJ7vZR=jVoVyfL&DZ!@Z<)5GE1q;j+Nx$wD#|$;5(Dg z%I+l(XYoBvmqEOpt$oj<(OE(~4|*n_C2{66&VG@_!G_VZt?#ilf3#|FUb8T`lp?wX z-934JmIUN+wg_VPuBy2p8GWiqrZgbTzo%#0wa+Nd65`TX6mL_K6kKo^f54$1;iB6` zajirkeOi9VSh``A41UCiQxpZpaIxfOfw#FPELj$A`z*hmvB!YUd`1G_6)L1*USUZ> zKX{z&tV*brEjw!6f08J>1yd8MGS*S`?PEf>sn6)nGGLS=F!i8Q0Jc8s7aZF9tVi&H zc4de23b_PAg~f6&;SNPF;=s!u!$o{}S9)oYFiXQzL7v5_FyM<0Ci9zHp$~4SRAzf!pkl~d5kw~w)7vU*C3cyn3h*fbuao?puaMs4#dzv!eB>@JTj%JIP zu{ab7U1kZ1e?$~7Ck(kwv7@a>pc8^?m8B$y?B+z2J}z9_UssVx6dDBoG|+K-^#0p#oEP&Nz6e?!qrxuwl8+a%d$@x<3NdA|d? zthxF!C2nmrYxP_j31|W}kp#anpZ^QP&(&D?NEQdach+SRXI)D0NVbM5l_tbbO_tRS zupubeU5WS8O|bj0fkj+iySmJK0Oqk%;wbXWG&TB$XtE&YcEt$ImXmhA6YO^B!5_x1~27Z6OVyLxV2g-i2l*pUBt zCiu8fGYuHQr*a*O_Lx;aP_0LqOuv24ALs05_MG|Dz9O{J7(GzU*b;=gx`e{KJRAFbLIo@#JAhO#@d$fFKogWARhjy<2Cd1cs#l?DWjW1|frh1Nz2OVL%^uoPVz!$Te7p$ceZlw8K} z(1v(uzhEP=_VS(H7* zLmNXri~##8izj#ML3< z0*qLrXlp~XH36c=5!Z)^>*KLWAA$|N12&X#3O9$Si+C}PxHUxF`jn-SJ_HYXe+wQC zeiqY*P(klN1;rfYKk}dhiD+OrUlcby$#`F{I5Q(&l)?F^9mw>;MCG3~hUewuk}aEZZ3(E`ori zjvC8-C}4J>fNYKP%VEb9-CsX6oMH|6z18-6T^To>TSEX~wE@5wLs}ame=PujVUL@k ztRW<@+DI^V#bXUYg4KouYm|=6A=>6|iAGyP=wP+cVGLz^h_d}DxMK}LgVg~IhvJSk zhzwR6894HMg65TB4^|ogG;VJzB8;fAtZIsGE5;}`!wxROh^{#{a5?zM zhCFzjO6~v>U6E~B)(+hcDrl&KT+{|!sg5b@hx%YaLmT8GfQhDAhN8-bK8lGx$V5OB z%{DC6mQBSP#l;xpeiu4#_0%ce7~FXkW@Q(U&GiYd$1p&O0`4Qr5#)c`KTQRRa% zzMx?ba40-QCEx7|^#!)}O=S6H$9LH32)jD+f zpbQ_?R64LlmK{?!HCY|SMIGd#HV{lPEW-qS+wY!Pk(R+QvA$deDobgW52-$!C6MYIV5-U#yW zp7xyRmlU-*2zk2AE?*(RMb2b@hC@|H%0dzZI8FvTxoZ-TbVd7{?1qoHAlhY_m{pgM z_cW_+?a3a>iIjiwBK98lbgZNHEQ;B2S+Lv$!O2rIf2D>ec96%O#N?1?i6N=V0?TMZ zQo`b%f!gJc3AsT96_Tdp(T#(QCi~NA$_8#+S^f*fsn#}ma#OxqhcA_;>1OJynlz^!hkmt~MY!-s>X-`7jyaf9-pa%IXJCpTu}gf+b1Qc*~QPihQyD zJ~RfvkR`Q?7Psf8VJbyBTPL+eN?iI$QmKZdH0k8toKsSbKaS{OlhUX4O_b>Y=Xy$r z_f)<20SWt*uGdp159=ct@dUGa#3KZucDb57Ywoncau32S;)ez=VF6#yk6rRNXh5oK ze~}R=$vO>-tiVP5fb&^XUC#1F|-S^hBuC%%iKf%i;~&lN%`B zDQ11Su0Im3f|5YaK$3N*)-QgC#YwZgqrak{lB|j6ynt0h1P9|qXIrbwrr6JVbf7RGoNf_V*Xoq32mvVW(HrJ(i6>R8J+i_eE z-l)O~3mlm>>UuaJ3or+{XB?Pc*^9g#$E{R3u^FY0mNS~e~`vof>$tIOx!hf?-ubxC2Bz=uRKBgKK0P_@>xj{~QfG?%6saN76bj}?NUKZ7on4~_*{qJJp260A9F`B{`&l8i&fAcA?UZdWr zMmG-7Vuj&t5m=RF#FupZSp}66qT~_lD0#qjS@#VKe*s0YoIvo7DS!Eh7nE(G|KS4b zC}a&{2%WFhidxK{p8^ks0o1#SBY4{s0iVgvr)#wuklL^aT0yWXkE7Zc4QjaHyW*rqk_qD{VC?PFGV^mgQ-7znUP9^cNGw zoS=NAoL|Eu#>J%m9I}(ZM+?7L*epoO9d?55eY$-fKU_>?BqPN{irEqgp>tT>qw0a? zVnXj}6nowTc^8u#MU@Ps7(_B81937~GO%RaV@0bKA_d9!LJ5Y;e;Y+ccC+xMCRwr- zsFI>v2!5MF4NJD+Uw}0%aFF?}pjHZ8%j$pSM3E&KFCD2aE`rO+v^5l<>BcxWjUS3U z|6VX0ZZ`0siQ*-4>o(S~8*b^dVtAeeMn|H(l*XPhlCq;2g7>|hwi;mK!gO&oLm|yF zvZ9$3&}hZh=;Go6f64U*A1CO~Zs5Cj7n6u?>yFSE{xA5u;_AI<_r1}hd(8a++8{p? zH8A;v{vz(T`5T80=sWO}e2@1TtlN)P(7&&}p8jW){jjTcTr@vv!U)8?N>^g(CUOa* zlgp>4oL{N%pX8sVVo8TS!kh*#lNr=lNfu_#0^a!bLsrddf4U6d2D32=A6g(MAQ>Vl zMHzvakHu(_#@mdhM;QuYmjt&gPRWY)izIv$jhh=bydsM0wP&Jo*MyaGQZaXw3RwY& zi&I23eQnE&@FDMIV_x9AVPQa1!HW)@EkcG(m?A`UE7lD(+K=y_3*CQjYzNFa2+{ZB zIfrvq#78U{e``e+uofvVwHD@wGS>fpw`?MJ3%4uO?(sWrm)zmhhX4ty1a)#51SBMq zn07@bbY9$HWC8E}1oN4oGzR0y?+CUYe_~~VW}crxjMclC_}N!9qA6j1 zwCY+v{LY5Y)+wdfi&=)ti1vXio6;A+?j5jujVSP^Ts&Y6{#GSWrXe|$vGF3kHa!VPwL^kPjx1CSb79{H! zR%A_T#c10WMl4a;blT_VA`R{xJkdZ0r zj*7%P{6Ea^+!)Xj*+k{{)T*t5bly@%;vJYriaT9?C?bUAm`IT{U30Nys1{Zv%fXr? zn?`wQ0HFX&vZG6y;tb^%32LVs7^>RftAa}K_Hmq$O;0v z3UT=Fpb*|@_77G4D;3~^G^i#`ahdB0YT*t&<#aG7q&krK?Q&skN6Ur5pd~H~&b7p) z!5w;n?&WId3g9E-^1-Sk!-bym;mg&De>*gVYorvDGjlFSNy#rEoe*MGb7)bf8304A7XuEeo&0qtFgJxKok&fCmpY8TKdia+q5v3DVHzE%}a4Xf2Kk> zZe^H@l#9nTrsT%ernc*+Svi^Q;!-ZmT@0duh(H5GvokQ#THiIHD2Dro1`cHJ`RaZKEdOw?MCB`V}Ip9c$>_hk|B@wt&Xr%jM3Q<#(Qa z6B1CysCw`WT%YVeQkzXo?)WM(~93&blQB;)_Rl$;NIrFLz{%Mk|>mhiIf-Ff>ZEQ(^%eLz9 z-}YZkWJ!u`f4I@x>wdYlPM3lB4Irs;kM)<1q#AaBbxBqXq<`Sbx>r<5cZ?6AjPk1= z6zxOLlA_DgI@g<{*6Za`e_cCdsm>pE*VvX(xuK39dFZ9vRJ#`AwUun=J=BbkWFh~< z_qqDQeUlziRp77@@5^XFT}D0b>OAUq1id&yANwfPB7e8_#>aWXlOESV2yS;Pq35}* zh(7o|8ZASmIcWM(j-FYcYw~@k9?|lxr<8bp+}G`hgtXv=OVgR`e;>Cd9HY-Nko8R` z&Y`#FJqb2+s8X??2IJYZguM-W&49N$nV{+6!`Xgz?0$M|g?~GPuVT~_BC1tHza9L9 zeaP}|5d!SojpI?SeXd@g6w6pMCsB@ScccnLOakK4bsRu+mc=Mr`%4DOFU5vJ_?SdvGX;YxtPGWyws54 z_!dy-ix>I!e_4!(2e=W@ErOFoCnBMH9L65ZKfJT8q5kt7eT3;z;vJLd196#@q_9oG zV8V#Jo1E+3W*i2ajZ22;iwku0U!xBKwlXXNlEG0pcHH(u{Uf7+! zWpUD`-Med9UO(GyV8$Ndn|TP_58x^XgZjB39(FCKLa1Ncw?5(p{74Y2J{!T>kCU!V z(Yy(q395;_)`UoQu&Qr8aW5*v$LG_v*6odJ{1ZPH=g9YKtf(msPk!D%$tTSNRIsxw za6=0kf5h9`F0vo>>>{5}6M=RaamIOnvvr)Z+)7L2#T-G8NaBA(ACD$gu}7C=6R;AP zuk(+)`+l^H%bQQE>BQ3u{|a(VlhF&d)6 zExHza6}Sz9-5RU<1kD6|J1Vp=Lr3TfBq&D8e>n5XC7a3+Z}-7M$6*#m!z3V#)(&wj z{na=05scucBe)HoxJl{JDLP_t^8T*8NZ=y$qvC=*yl3$pjk2SkTif6Vnn}g{Gup{d z715tZ*vhojOY8K&+i$PGo$FOu-)U1z79CB_ZShV@;)Weiu-U?LLc%V zEcfTf1UAugSAJmb`y`DQWZ?%sdn}7!(>(*MYB~6SOegPm^zP^O+^zb(^E(=mfcZ50 z_v@K$5kHGp4%lEy6H3_ofMo1_1lCj(BwQM9UkRKOk6c}di#Q4%`SU^se&Er2cod9eWO!cyzH@ea$&tf73ALCuL41m&d&P1m$e{gwqr?vBa~JE$ zrPvj;Ce(Gjealyz-_i7zx^d)nhH!UBsN%;@ddOYaj1%2T8!mj#u-+_DFj9T7 z-vq5wy398qVP|&bv-M`dH)dBWnyyF`-%qPUkkdsFFQy^M7)_`2fv-feX-3m~noi%m zm|x$#kV0>Q7-ZD5i^=;%K%%<|N@1j;7@nGGilUfHltMH)Z;QOx-9UC*VQ%E|q;EHD z^kbFqRP`d4d4`_QpMKrMf9x#u(>P;<<)3CqIjwF)?~}4rmJ~QN-ZVOw>%Q*^tLG`} zFf{HA&@~R5_66zkX(F#Y>59N~K?Bdts>d=Wmv5j5?53E|_62piNb=;FdE{ODnM+bn zBz^@q52IKQO|_}tl6xwWhLXdAMCB#41htue8A z)HY>`Q0%0Zbk@v)ID)uqBemfuKS*CBe88>&94|nBfc;Gf86SxiY2?+bT{NmS!cy%d zjGRi*n~qC5D5O-! z6;I+m(6NB5Nbx3H!HVE^vj~0G344oWwp~!R4<}qM^@Y*4%Hhuu54nVJ9G=)SJNuZy^p)PKO#2G22Ni0o_qoS{eTr2^E_UfsF)cN9aw_!4}XXV(8>Rj1qeE$wI zg%-8pQxr{zYE1U!_fx`jsKgn7iRoaguSf6TY&sLDh?nxXn;+sDd}Ace0$ z^!nog7YL|yK%%&b*THzVl}9{Sn6Ltzx~_)`S0k3 z=d)+shoExx?l}OkuS-?C!}`^j8HJ+^$6>ZQIiySf_kaJ-e@43l6MfzK-K2KoGTZ#n7*b#QrS`*A+AW6{$qZ84%OoNy{}T}oKPo?N2%f2uojAXdMa4{ z(?6kIxDzsfMP%AHhYe&~9Wtrq>BvP;u@*xFf3c2^qgd1)xhH-AR(2vBr0RQ2Dn{WY zO)1(!ATHgK)GHUg`SI?A@SQ9cBX{iw5Sc5EcAuh*(ph!H5e@%q@-14aPP^#erP{ky8iLV&iZF?bo-5qM*94v~xV16)tg zJ)b$(i^)IG&}YX-o5UlG&UQ79PSHQ`BOg*83XwRC-KoM_TtM*A^J)It6L=2_yprYF N{{t=SD#I=KA^_ZbfVThu diff --git a/examples/server/webui/index.html b/examples/server/webui/index.html index 86a79b77f..a76d831a9 100644 --- a/examples/server/webui/index.html +++ b/examples/server/webui/index.html @@ -37,7 +37,7 @@
+ }" @click="setViewingConv(conv.id)" dir="auto"> {{ conv.messages[0].content }}
@@ -156,6 +156,7 @@ @keydown.enter.shift.exact.prevent="inputMsg += '\n'" :disabled="isGenerating" id="msg-input" + dir="auto" > @@ -244,7 +245,7 @@
+ }" dir="auto">