Merge commit '8dcc3662a2' into concedo_experimental

Keep changes from https://github.com/ggml-org/llama.cpp/pull/18096 without https://github.com/ggml-org/llama.cpp/pull/14904 Reason is to maintain compatibility with 2023 w64devkit # Conflicts: # .github/ISSUE_TEMPLATE/019-bug-misc.yml # examples/model-conversion/scripts/causal/run-org-model.py # examples/speculative/speculative.cpp # ggml/src/ggml-cpu/arch-fallback.h # ggml/src/ggml-cpu/repack.cpp # ggml/src/ggml-cpu/repack.h # ggml/src/ggml-hexagon/ggml-hexagon.cpp # ggml/src/ggml-hexagon/htp/act-ops.c # ggml/src/ggml-hexagon/htp/htp-msg.h # ggml/src/ggml-hexagon/htp/hvx-utils.c # ggml/src/ggml-hexagon/htp/hvx-utils.h # ggml/src/ggml-hexagon/htp/main.c
2026-05-13 07:09:03 +00:00 · 2025-12-19 02:11:55 +08:00 · 2025-12-19 02:11:55 +08:00 · e005fc2587
commit e005fc2587
parent fb31059f9c 8dcc3662a2
16 changed files with 712 additions and 87 deletions
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@ -362,23 +362,39 @@ const char * llama_sampler_name(const struct llama_sampler * smpl) {
 }

 void llama_sampler_accept(struct llama_sampler * smpl, llama_token token) {
+    if (!smpl) {
+        return;
+    }
+
    if (smpl->iface->accept) {
        smpl->iface->accept(smpl, token);
    }
 }

 void llama_sampler_apply(struct llama_sampler * smpl, struct llama_token_data_array * cur_p) {
+    if (!smpl) {
+        return;
+    }
+
    GGML_ASSERT(smpl->iface->apply);
    smpl->iface->apply(smpl, cur_p);
 }

 void llama_sampler_reset(struct llama_sampler * smpl) {
+    if (!smpl) {
+        return;
+    }
+
    if (smpl->iface->reset) {
        smpl->iface->reset(smpl);
    }
 }

 struct llama_sampler * llama_sampler_clone(const struct llama_sampler * smpl) {
+    if (!smpl) {
+        return nullptr;
+    }
+
    if (smpl->iface->clone) {
        return smpl->iface->clone(smpl);
    }
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -566,6 +566,7 @@ static void llama_params_fit_impl(
            } else {
                assert(ngl_per_device_high[id].n_layer == n_unassigned);
                ngl_per_device = ngl_per_device_high;
+                mem            = mem_high;
                LLAMA_LOG_DEBUG("%s: set ngl_per_device[%d].n_layer=%" PRIu32 "\n", __func__, id, ngl_per_device[id].n_layer);
            }
        }
@ -653,6 +654,7 @@ static void llama_params_fit_impl(
            }
        } else {
            ngl_per_device = ngl_per_device_high;
+            mem            = mem_high;
            id_dense_start = id_dense_start_high;
            LLAMA_LOG_DEBUG("%s: set ngl_per_device[%zu].(n_layer, n_part)=(%" PRIu32 ", %" PRIu32 "), id_dense_start=%zu\n",
                __func__, id, ngl_per_device[id].n_layer, ngl_per_device[id].n_part, id_dense_start);