Merge branch 'upstream' into concedo_experimental

# Conflicts: # .gitignore # CONTRIBUTING.md # Makefile # examples/llava/CMakeLists.txt # scripts/sync-ggml-am.sh # scripts/sync-ggml.last # scripts/sync-ggml.sh # src/llama-vocab.cpp
2025-09-12 09:59:41 +00:00 · 2024-08-10 11:42:32 +08:00 · 2024-08-10 11:42:32 +08:00 · bdfe8526b8
commit bdfe8526b8
parent 6dd3d5515e b72942fac9
44 changed files with 2241 additions and 439 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -345,7 +345,7 @@ extern "C" {
        int32_t nthread;                     // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
        enum llama_ftype ftype;              // quantize to this llama_ftype
        enum ggml_type output_tensor_type;   // output tensor type
-        enum ggml_type token_embedding_type; // itoken embeddings tensor type
+        enum ggml_type token_embedding_type; // token embeddings tensor type
        bool allow_requantize;               // allow quantizing non-f32/f16 tensors
        bool quantize_output_tensor;         // quantize output.weight
        bool only_copy;                      // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored