vocab : add midm-2.0 model pre-tokenizer (#14626)

This commit is contained in:
Dowon 2025-07-11 16:36:04 +09:00 committed by GitHub
parent 0aedae00e6
commit 576c82eda2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 6 additions and 1 deletions

View file

@ -129,6 +129,7 @@ models = [
{"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
{"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
{"name": "a.x-4.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/skt/A.X-4.0", },
{"name": "midm-2.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct", },
]
# some models are known to be broken upstream, so we will skip them as exceptions