diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 9d011ff34..ff30a2ae7 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -1131,10 +1131,6 @@ void llama_model_base::load_hparams(llama_model_loader & ml) { ml.get_key(LLM_KV_ROPE_DIMENSION_COUNT_SWA, hparams.n_rot_swa, false); } - // for differentiating model types - uint32_t n_vocab = 0; - ml.get_key(LLM_KV_VOCAB_SIZE, n_vocab, false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, n_vocab, false); - // for classifier models ml.get_arr(LLM_KV_CLASSIFIER_OUTPUT_LABELS, classifier_labels, false); if (!classifier_labels.empty()) { diff --git a/src/models/deepseek2.cpp b/src/models/deepseek2.cpp index 53574df46..1fe54adc1 100644 --- a/src/models/deepseek2.cpp +++ b/src/models/deepseek2.cpp @@ -1,7 +1,8 @@ #include "models.h" void llama_model_deepseek2::load_arch_hparams(llama_model_loader & ml) { - const auto n_vocab = vocab.n_tokens(); + uint32_t n_vocab = 0; + ml.get_key(LLM_KV_VOCAB_SIZE, n_vocab, false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, n_vocab, false); // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B, Kanana-2-30B-A3B const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26 || (hparams.n_layer == 48 && n_vocab == 128256)); diff --git a/src/models/llama.cpp b/src/models/llama.cpp index 10523117c..8ddb59368 100644 --- a/src/models/llama.cpp +++ b/src/models/llama.cpp @@ -1,7 +1,8 @@ #include "models.h" void llama_model_llama::load_arch_hparams(llama_model_loader & ml) { - const auto n_vocab = vocab.n_tokens(); + uint32_t n_vocab = 0; + ml.get_key(LLM_KV_VOCAB_SIZE, n_vocab, false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, n_vocab, false); ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);