Merge branch 'upstream' into concedo_experimental

# Conflicts: # .devops/full-cuda.Dockerfile # .devops/llama-cli-cuda.Dockerfile # .devops/llama-server-cuda.Dockerfile # .devops/llama-server-intel.Dockerfile # .devops/llama-server-rocm.Dockerfile # .devops/llama-server-vulkan.Dockerfile # .devops/llama-server.Dockerfile # .github/workflows/docker.yml # docs/docker.md # examples/llama-bench/llama-bench.cpp # flake.lock # ggml/include/ggml.h # ggml/src/CMakeLists.txt # scripts/sync-ggml.last # src/llama.cpp # tests/test-backend-ops.cpp # tests/test-grad0.cpp # tests/test-rope.cpp
2025-09-11 09:34:37 +00:00 · 2024-08-30 10:37:39 +08:00 · 2024-08-30 10:37:39 +08:00 · d220495dd4
commit d220495dd4
parent 0f9968ef64 42c76d1358
42 changed files with 100585 additions and 99448 deletions
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@ -22,7 +22,7 @@
 #endif

 static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph * graph, int n_threads) {
-    struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
+    struct ggml_cplan plan = ggml_graph_plan(graph, n_threads, nullptr);

    if (plan.work_size > 0) {
        buf.resize(plan.work_size);
@ -55,7 +55,7 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
 #define TENSOR_DUMP(tensor) tensor_dump(tensor, #tensor)

 struct benchmark_params_struct {
-    int32_t n_threads     = 1;
+    int     n_threads     = 1;
    int32_t n_iterations  = 10;
 };