model: mistral small 4 support (#20649)

* model: mistral small 4 support

* fix test

* fix test (2)

* Apply suggestions from code review

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update convert_hf_to_gguf.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* change newline

---------

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
Xuan-Son Nguyen 2026-03-17 00:31:14 +01:00 committed by GitHub
parent 45172df4d6
commit d34ff7eb5b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 133 additions and 42 deletions

View file

@ -1587,6 +1587,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
}
} break;
case LLM_ARCH_DEEPSEEK2:
case LLM_ARCH_MISTRAL4:
{
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B, Kanana-2-30B-A3B
const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26 || (hparams.n_layer == 48 && n_vocab == 128256));
@ -4883,6 +4884,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
}
} break;
case LLM_ARCH_DEEPSEEK2:
case LLM_ARCH_MISTRAL4:
{
const bool is_mla = hparams.is_mla();
@ -7850,7 +7852,7 @@ void llama_model::print_info() const {
LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n", __func__, hparams.expert_weights_scale);
}
if (arch == LLM_ARCH_DEEPSEEK2 || arch == LLM_ARCH_GLM_DSA) {
if (arch == LLM_ARCH_DEEPSEEK2 || arch == LLM_ARCH_GLM_DSA || arch == LLM_ARCH_MISTRAL4) {
LLAMA_LOG_INFO("%s: n_layer_dense_lead = %d\n", __func__, hparams.n_layer_dense_lead);
LLAMA_LOG_INFO("%s: n_lora_q = %d\n", __func__, hparams.n_lora_q);
LLAMA_LOG_INFO("%s: n_lora_kv = %d\n", __func__, hparams.n_lora_kv);
@ -8428,6 +8430,7 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
} break;
case LLM_ARCH_DEEPSEEK2:
case LLM_ARCH_GLM_DSA:
case LLM_ARCH_MISTRAL4:
{
llm = std::make_unique<llm_build_deepseek2>(*this, params);
} break;
@ -8839,6 +8842,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
case LLM_ARCH_ERNIE4_5:
case LLM_ARCH_ERNIE4_5_MOE:
case LLM_ARCH_MISTRAL3:
case LLM_ARCH_MISTRAL4:
case LLM_ARCH_LLAMA_EMBED:
case LLM_ARCH_MAINCODER:
case LLM_ARCH_GLM_DSA: