Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/workflows/build.yml # .github/workflows/server.yml # CMakeLists.txt # Makefile # examples/embedding/embedding.cpp # examples/imatrix/imatrix.cpp # examples/llama-bench/llama-bench.cpp # examples/llava/MobileVLM-README.md # examples/parallel/parallel.cpp # examples/perplexity/perplexity.cpp # examples/quantize/CMakeLists.txt # examples/server/README.md # examples/speculative/speculative.cpp # tests/test-backend-ops.cpp
2025-09-10 09:04:36 +00:00 · 2024-09-13 16:17:24 +08:00 · 2024-09-13 16:17:24 +08:00 · e44ddf26ef
commit e44ddf26ef
parent 0fd85c3940 0abc6a2c25
47 changed files with 117978 additions and 117646 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -2388,7 +2388,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)

    if(debugmode==1 && file_format == FileFormat::GGUF_GENERIC)
    {
-        llama_perf_reset(llama_ctx_v4, LLAMA_PERF_TYPE_CONTEXT);
+        llama_perf_context_reset(llama_ctx_v4);
    }

    generation_finished = false; // Set current generation status
@ -3317,7 +3317,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
    if(debugmode==1 && file_format == FileFormat::GGUF_GENERIC)
    {
        printf("\n");
-        llama_perf_print(llama_ctx_v4, LLAMA_PERF_TYPE_CONTEXT);
+        llama_perf_context_print(llama_ctx_v4);
    }

    time2 = timer_check();