still not really working right

2026-05-11 21:32:11 +00:00 · 2025-11-09 01:57:48 +08:00 · 2025-11-09 01:57:48 +08:00 · d6a2ad8455
commit d6a2ad8455
parent e6ca0aa8d0 eeee367de5
48 changed files with 3193 additions and 2705 deletions
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@ -121,6 +121,7 @@
 #include "models/wavtokenizer-dec.cpp"
 #include "models/xverse.cpp"
 #include "models/graph-context-mamba.cpp"
+#include "models/pangu-embedded.cpp"

 #if defined(GGML_USE_CLBLAST)
 #  include "ggml_v3b-opencl.h"
@ -376,8 +377,8 @@ static bool weight_buft_supported(const llama_hparams & hparams, ggml_tensor * w
            } break;
        case GGML_OP_IM2COL:
            {
-                const int n_embd = hparams.n_embd;
-                ggml_tensor * b = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, n_embd, w->ne[1], 1, 1);
+                const int n_embd_inp = hparams.n_embd_inp();
+                ggml_tensor * b = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, n_embd_inp, w->ne[1], 1, 1);
                op_tensor = ggml_im2col(ctx, w, b, 1, 0, 0, 0, 1, 0, false, GGML_TYPE_F16);
            } break;
        case GGML_OP_SCALE:
@ -1139,9 +1140,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                    case 64: type = LLM_TYPE_32B; break;
                    default: type = LLM_TYPE_UNKNOWN;
                }
-                // since vision model stacks deepstack features along feature dim
-                // we also create a fake "n_embd" for text model to be the main embd + deepstack embds
-                hparams.n_embd *= hparams.n_deepstack_layers + 1;
            } break;
        case LLM_ARCH_QWEN3MOE:
            {
@ -1165,9 +1163,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                    case 94: type = LLM_TYPE_235B_A22B; break;
                    default: type = LLM_TYPE_UNKNOWN;
                }
-                // since vision model stacks deepstack features along feature dim
-                // we also create a fake "n_embd" for text model to be the main embd + deepstack embds
-                hparams.n_embd *= hparams.n_deepstack_layers + 1;
            } break;
        case LLM_ARCH_PHI2:
            {
@ -3494,10 +3489,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
            case LLM_ARCH_QWEN3:
            case LLM_ARCH_QWEN3VL:
                {
-                    // for model loading, the weights only have the main embd
-                    // so we need to divide by the number of deepstack layers + 1
-                    // n_embd is const int so we declare a new variable
-                    int64_t n_embd = hparams.n_embd / (hparams.n_deepstack_layers + 1);
                    tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);

                    // output
@ -3533,10 +3524,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
            case LLM_ARCH_QWEN3MOE:
            case LLM_ARCH_QWEN3VLMOE:
                {
-                    // for model loading, the weights only have the main embd
-                    // so we need to divide by the number of deepstack layers + 1
-                    // n_embd is const int so we declare a new variable
-                    int64_t n_embd = hparams.n_embd / (hparams.n_deepstack_layers + 1);
                    tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);

                    // output
@ -6689,6 +6676,7 @@ void llama_model::print_info() const {
    if (!hparams.vocab_only) {
        LLAMA_LOG_INFO("%s: n_ctx_train      = %u\n",     __func__, hparams.n_ctx_train);
        LLAMA_LOG_INFO("%s: n_embd           = %u\n",     __func__, hparams.n_embd);
+        LLAMA_LOG_INFO("%s: n_embd_inp       = %u\n",     __func__, hparams.n_embd_inp());
        LLAMA_LOG_INFO("%s: n_layer          = %u\n",     __func__, hparams.n_layer);
        LLAMA_LOG_INFO("%s: n_head           = %s\n",     __func__, print_f([&](uint32_t il) { return hparams.n_head(il);    }, hparams.n_layer).c_str());
        LLAMA_LOG_INFO("%s: n_head_kv        = %s\n",     __func__, print_f([&](uint32_t il) { return hparams.n_head_kv(il); }, hparams.n_layer).c_str());
@ -7537,6 +7525,10 @@ int32_t llama_model_n_embd(const llama_model * model) {
    return model->hparams.n_embd;
 }

+int32_t llama_model_n_embd_inp(const llama_model * model) {
+    return model->hparams.n_embd_inp();
+}
+
 int32_t llama_model_n_layer(const llama_model * model) {
    return model->hparams.n_layer;
 }