Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/workflows/build.yml # .github/workflows/server.yml # CMakeLists.txt # Makefile # README.md # ci/run.sh # common/CMakeLists.txt # common/common.cpp # docs/backend/SYCL.md # examples/embedding/embedding.cpp # examples/imatrix/imatrix.cpp # examples/infill/infill.cpp # examples/llama-bench/llama-bench.cpp # examples/main/README.md # examples/parallel/parallel.cpp # examples/perplexity/perplexity.cpp # examples/server/CMakeLists.txt # examples/server/README.md # examples/server/bench/README.md # examples/server/tests/README.md # examples/speculative/speculative.cpp # flake.lock # ggml/CMakeLists.txt # ggml/src/CMakeLists.txt # grammars/README.md # scripts/compare-commits.sh # scripts/compare-llama-bench.py # tests/CMakeLists.txt
2025-09-12 01:54:37 +00:00 · 2024-09-19 14:53:57 +08:00 · 2024-09-19 14:53:57 +08:00 · 29625c3d2e
commit 29625c3d2e
parent 0b0e456202 64c6af3195
54 changed files with 3396 additions and 2709 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -441,6 +441,7 @@ extern "C" {
    LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
    LLAMA_API int32_t llama_n_embd     (const struct llama_model * model);
    LLAMA_API int32_t llama_n_layer    (const struct llama_model * model);
+    LLAMA_API int32_t llama_n_head     (const struct llama_model * model);

    LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);