vocab : JetBrains Mellum pre-tokenizer (#15045)

This commit is contained in:
Csaba Kecskemeti 2025-08-03 12:38:18 -07:00 committed by GitHub
parent 83bc2f288c
commit 97366dc6ab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 6 additions and 1 deletions

View file

@ -1856,7 +1856,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
tokenizer_pre == "gigachat" ||
tokenizer_pre == "jina-v2-es" ||
tokenizer_pre == "jina-v2-de" ||
tokenizer_pre == "a.x-4.0") {
tokenizer_pre == "a.x-4.0" ||
tokenizer_pre == "mellum") {
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
} else if (
tokenizer_pre == "jina-v1-en" ||