Merge branch 'master' into concedo_experimental

# Conflicts: # .devops/nix/package.nix # CMakeLists.txt # README.md # ggml-metal.m # ggml.c
2025-09-11 09:34:37 +00:00 · 2024-01-08 14:18:49 +08:00 · 2024-01-08 14:18:49 +08:00 · f04b6e7287
commit f04b6e7287
parent b614a86dd9 b7e7982953
18 changed files with 195 additions and 191 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -2191,7 +2191,11 @@ struct llama_model_loader {
                    type_max   = type;
                }

-                // LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, name, ggml_type_name(meta->type), llama_format_tensor_shape(meta).c_str());
+                // TODO: make runtime configurable
+#if 0
+                struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
+                LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, ggml_get_name(meta), ggml_type_name(type), llama_format_tensor_shape(meta).c_str());
+#endif
            }

            switch (type_max) {
@ -4801,7 +4805,6 @@ struct llm_build_context {
        const int64_t n_embd_head = hparams.n_embd_head_v;
        const int64_t n_embd_gqa  = hparams.n_embd_v_gqa();
        GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
-        GGML_ASSERT(n_embd_gqa  == n_embd);

        struct ggml_tensor * cur;
        struct ggml_tensor * inpL;
@ -4925,7 +4928,6 @@ struct llm_build_context {
        const int64_t n_embd_head = hparams.n_embd_head_v;
        const int64_t n_embd_gqa  = hparams.n_embd_v_gqa();
        GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
-        GGML_ASSERT(n_embd_gqa  == n_embd);

        struct ggml_tensor * cur;
        struct ggml_tensor * pos;
@ -5024,9 +5026,7 @@ struct llm_build_context {
        struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);

        const int64_t n_embd_head = hparams.n_embd_head_v;
-        const int64_t n_embd_gqa  = hparams.n_embd_v_gqa();
        GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
-        GGML_ASSERT(n_embd_gqa  == n_embd);

        const int64_t n_rot = n_embd_head_k / 2;

@ -5238,9 +5238,7 @@ struct llm_build_context {
        struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);

        const int64_t n_embd_head = hparams.n_embd_head_v;
-        const int64_t n_embd_gqa  = hparams.n_embd_v_gqa();
        GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
-        GGML_ASSERT(n_embd_gqa  == n_embd);

        struct ggml_tensor * cur;
        struct ggml_tensor * inpL;
@ -5333,7 +5331,6 @@ struct llm_build_context {
        const int64_t n_embd_head = hparams.n_embd_head_v;
        const int64_t n_embd_gqa  = hparams.n_embd_v_gqa();
        GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
-        GGML_ASSERT(n_embd_gqa  == n_embd);

        struct ggml_tensor * cur;
        struct ggml_tensor * inpL;
@ -5429,7 +5426,6 @@ struct llm_build_context {
        const int64_t n_embd_head = hparams.n_embd_head_v;
        const int64_t n_embd_gqa  = hparams.n_embd_v_gqa();
        GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
-        GGML_ASSERT(n_embd_gqa  == n_embd);

        struct ggml_tensor * cur;
        struct ggml_tensor * inpL;
@ -5756,7 +5752,6 @@ struct llm_build_context {
        const int64_t n_embd_head = hparams.n_embd_head_v;
        const int64_t n_embd_gqa  = hparams.n_embd_v_gqa();
        GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
-        GGML_ASSERT(n_embd_gqa  == n_embd);

        struct ggml_tensor * cur;
        struct ggml_tensor * attn_norm_output;
@ -5980,7 +5975,6 @@ struct llm_build_context {
        const int64_t n_embd_head = hparams.n_embd_head_v;
        const int64_t n_embd_gqa  = hparams.n_embd_v_gqa();
        GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
-        GGML_ASSERT(n_embd_gqa  == n_embd);

        struct ggml_tensor * cur;
        struct ggml_tensor * pos;