From 4bb625b713fd9b294b4f7af87eaa752b710c7cc1 Mon Sep 17 00:00:00 2001 From: Ryan Mangeno <160974989+ryan-mangeno@users.noreply.github.com> Date: Thu, 10 Jul 2025 13:41:00 -0400 Subject: [PATCH] Smoldocling support (#14597) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * support for smoldocling * fixed merge conflicts * Update gguf-py/gguf/tensor_mapping.py Co-authored-by: Gabe Goodhart * Update gguf-py/gguf/tensor_mapping.py Co-authored-by: Gabe Goodhart * merge conflicts * pre tokenizer merge fix * convert : fix smollm3 jinja template (#14586) Signed-off-by: ryan-mangeno * support for smoldocling Signed-off-by: ryan-mangeno * fixed merge conflicts Signed-off-by: ryan-mangeno * Update src/llama-vocab.cpp Co-authored-by: Sigbjørn Skjæret * Update gguf-py/gguf/tensor_mapping.py Co-authored-by: Sigbjørn Skjæret * Update gguf-py/gguf/tensor_mapping.py Co-authored-by: Sigbjørn Skjæret * Update src/llama-model.h Co-authored-by: Sigbjørn Skjæret * safetensors tensor mapping Signed-off-by: ryan-mangeno * added back accidental removal of clean spaces for hunyuan * Update src/llama-vocab.cpp Co-authored-by: Sigbjørn Skjæret * updated hash and reordererd model list * Update gguf-py/gguf/tensor_mapping.py Co-authored-by: Sigbjørn Skjæret * Update src/llama-vocab.cpp Co-authored-by: Sigbjørn Skjæret * Update include/llama.h Co-authored-by: Sigbjørn Skjæret * Update convert_hf_to_gguf.py Co-authored-by: Sigbjørn Skjæret * Update convert_hf_to_gguf_update.py Co-authored-by: Sigbjørn Skjæret * Update src/llama-vocab.cpp Co-authored-by: Sigbjørn Skjæret * removed old tensor name * removed tensor mappings -> handled by smolvlm * Update gguf-py/gguf/tensor_mapping.py Co-authored-by: Sigbjørn Skjæret * Update gguf-py/gguf/tensor_mapping.py Co-authored-by: Sigbjørn Skjæret * Update gguf-py/gguf/tensor_mapping.py Co-authored-by: Sigbjørn Skjæret --------- Signed-off-by: ryan-mangeno Co-authored-by: Gabe Goodhart Co-authored-by: Xuan-Son Nguyen Co-authored-by: Sigbjørn Skjæret Co-authored-by: compilade --- src/llama-model.cpp | 2 ++ src/llama-model.h | 1 + src/llama-vocab.cpp | 2 ++ 3 files changed, 5 insertions(+) diff --git a/src/llama-model.cpp b/src/llama-model.cpp index ca094e47b..f7211ac6c 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -40,6 +40,7 @@ const char * llm_type_name(llm_type type) { case LLM_TYPE_190M: return "190M"; case LLM_TYPE_220M: return "220M"; case LLM_TYPE_250M: return "250M"; + case LLM_TYPE_256M: return "256M"; case LLM_TYPE_270M: return "270M"; case LLM_TYPE_335M: return "335M"; case LLM_TYPE_410M: return "410M"; @@ -581,6 +582,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { case 22: type = LLM_TYPE_1B; break; case 26: type = LLM_TYPE_3B; break; case 28: type = LLM_TYPE_3B; break; // Llama 3.2 3B + case 30: type = LLM_TYPE_256M; break; // smoldocling 256M // granite uses a vocab with len 49152 case 32: type = n_vocab == 49152 ? LLM_TYPE_3B : (n_vocab < 40000 ? LLM_TYPE_7B : LLM_TYPE_8B); break; case 36: type = LLM_TYPE_8B; break; // granite diff --git a/src/llama-model.h b/src/llama-model.h index 453f5af62..431efbd51 100644 --- a/src/llama-model.h +++ b/src/llama-model.h @@ -32,6 +32,7 @@ enum llm_type { LLM_TYPE_190M, LLM_TYPE_220M, LLM_TYPE_250M, + LLM_TYPE_256M, LLM_TYPE_270M, LLM_TYPE_335M, LLM_TYPE_410M, diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 6aa1d901c..10823b183 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -1846,6 +1846,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "" || t.first == "_" || t.first == "<|end▁of▁sentence|>" // DeepSeek + || t.first == "" // smoldocling ) { special_eot_id = t.second; if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { @@ -2005,6 +2006,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "" || t.first == "_" || t.first == "<|end_of_text|>" + || t.first == "" // smoldocling ) { special_eog_ids.insert(t.second); if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {