Smoldocling support (#14597)

* support for smoldocling

* fixed merge conflicts

* Update gguf-py/gguf/tensor_mapping.py

Co-authored-by: Gabe Goodhart <gabe.l.hart@gmail.com>

* Update gguf-py/gguf/tensor_mapping.py

Co-authored-by: Gabe Goodhart <gabe.l.hart@gmail.com>

* merge conflicts

* pre tokenizer merge fix

* convert : fix smollm3 jinja template (#14586)

Signed-off-by: ryan-mangeno <ryanmangeno@gmail.com>

* support for smoldocling

Signed-off-by: ryan-mangeno <ryanmangeno@gmail.com>

* fixed merge conflicts

Signed-off-by: ryan-mangeno <ryanmangeno@gmail.com>

* Update src/llama-vocab.cpp

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update gguf-py/gguf/tensor_mapping.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update gguf-py/gguf/tensor_mapping.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update src/llama-model.h

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* safetensors tensor mapping

Signed-off-by: ryan-mangeno <ryanmangeno@gmail.com>

* added back accidental removal of clean spaces for hunyuan

* Update src/llama-vocab.cpp

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* updated hash and reordererd model list

* Update gguf-py/gguf/tensor_mapping.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update src/llama-vocab.cpp

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update include/llama.h

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update convert_hf_to_gguf.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update convert_hf_to_gguf_update.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update src/llama-vocab.cpp

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* removed old tensor name

* removed tensor mappings -> handled by smolvlm

* Update gguf-py/gguf/tensor_mapping.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update gguf-py/gguf/tensor_mapping.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update gguf-py/gguf/tensor_mapping.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

---------

Signed-off-by: ryan-mangeno <ryanmangeno@gmail.com>
Co-authored-by: Gabe Goodhart <gabe.l.hart@gmail.com>
Co-authored-by: Xuan-Son Nguyen <son@huggingface.co>
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
Co-authored-by: compilade <git@compilade.net>
This commit is contained in:
Ryan Mangeno 2025-07-10 13:41:00 -04:00 committed by GitHub
parent 11ee0fea2a
commit 4bb625b713
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 5 additions and 0 deletions

View file

@ -40,6 +40,7 @@ const char * llm_type_name(llm_type type) {
case LLM_TYPE_190M: return "190M"; case LLM_TYPE_190M: return "190M";
case LLM_TYPE_220M: return "220M"; case LLM_TYPE_220M: return "220M";
case LLM_TYPE_250M: return "250M"; case LLM_TYPE_250M: return "250M";
case LLM_TYPE_256M: return "256M";
case LLM_TYPE_270M: return "270M"; case LLM_TYPE_270M: return "270M";
case LLM_TYPE_335M: return "335M"; case LLM_TYPE_335M: return "335M";
case LLM_TYPE_410M: return "410M"; case LLM_TYPE_410M: return "410M";
@ -581,6 +582,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
case 22: type = LLM_TYPE_1B; break; case 22: type = LLM_TYPE_1B; break;
case 26: type = LLM_TYPE_3B; break; case 26: type = LLM_TYPE_3B; break;
case 28: type = LLM_TYPE_3B; break; // Llama 3.2 3B case 28: type = LLM_TYPE_3B; break; // Llama 3.2 3B
case 30: type = LLM_TYPE_256M; break; // smoldocling 256M
// granite uses a vocab with len 49152 // granite uses a vocab with len 49152
case 32: type = n_vocab == 49152 ? LLM_TYPE_3B : (n_vocab < 40000 ? LLM_TYPE_7B : LLM_TYPE_8B); break; case 32: type = n_vocab == 49152 ? LLM_TYPE_3B : (n_vocab < 40000 ? LLM_TYPE_7B : LLM_TYPE_8B); break;
case 36: type = LLM_TYPE_8B; break; // granite case 36: type = LLM_TYPE_8B; break; // granite

View file

@ -32,6 +32,7 @@ enum llm_type {
LLM_TYPE_190M, LLM_TYPE_190M,
LLM_TYPE_220M, LLM_TYPE_220M,
LLM_TYPE_250M, LLM_TYPE_250M,
LLM_TYPE_256M,
LLM_TYPE_270M, LLM_TYPE_270M,
LLM_TYPE_335M, LLM_TYPE_335M,
LLM_TYPE_410M, LLM_TYPE_410M,

View file

@ -1846,6 +1846,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|| t.first == "<EOT>" || t.first == "<EOT>"
|| t.first == "_<EOT>" || t.first == "_<EOT>"
|| t.first == "<end▁of▁sentence>" // DeepSeek || t.first == "<end▁of▁sentence>" // DeepSeek
|| t.first == "<end_of_utterance>" // smoldocling
) { ) {
special_eot_id = t.second; special_eot_id = t.second;
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
@ -2005,6 +2006,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|| t.first == "<EOT>" || t.first == "<EOT>"
|| t.first == "_<EOT>" || t.first == "_<EOT>"
|| t.first == "<|end_of_text|>" || t.first == "<|end_of_text|>"
|| t.first == "<end_of_utterance>" // smoldocling
) { ) {
special_eog_ids.insert(t.second); special_eog_ids.insert(t.second);
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {