mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-16 19:59:16 +00:00
model : add sarvam_moe architecture support (#20275)
This commit is contained in:
parent
65d7a8bbf0
commit
1e5ad35d56
4 changed files with 46 additions and 0 deletions
|
|
@ -1570,6 +1570,9 @@ class TextModel(ModelBase):
|
|||
if chkhsh == "862f827721df956049dff5ca81a57f29e575280bc622e290d3bf4e35eca29015":
|
||||
# ref: https://huggingface.co/codefuse-ai/F2LLM-v2-4B
|
||||
res = "f2llmv2"
|
||||
if chkhsh == "62f6fb0a6fd5098caeabb19b07a5c1099cafc8b9c40eab6ea89ece4ec02fbc57":
|
||||
# ref: https://huggingface.co/sarvamai/sarvam-30b
|
||||
res = "sarvam-moe"
|
||||
|
||||
if res is None:
|
||||
logger.warning("\n")
|
||||
|
|
@ -11591,6 +11594,34 @@ class BailingMoeV2Model(TextModel):
|
|||
raise ValueError(f"Unprocessed experts: {experts}")
|
||||
|
||||
|
||||
@ModelBase.register("SarvamMoEForCausalLM", "modeling_sarvam_moe.SarvamMoEForCausalLM")
|
||||
class SarvamMoEModel(BailingMoeV2Model):
|
||||
model_arch = gguf.MODEL_ARCH.BAILINGMOE2
|
||||
# Sarvam-MoE shares the BailingMoeV2 architecture; only differences:
|
||||
# - full rotary (no partial_rotary_factor)
|
||||
# - expert bias is zero-mean normalized at load time
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
super().set_gguf_parameters()
|
||||
hparams = self.hparams
|
||||
if (rope_dim := hparams.get("head_dim")) is None:
|
||||
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
|
||||
# Override the partial-rotary value written by BailingMoeV2 with the full rotary dim
|
||||
self.gguf_writer.add_rope_dimension_count(rope_dim)
|
||||
|
||||
@classmethod
|
||||
def filter_tensors(cls, item: tuple[str, Callable[[], Tensor]]) -> tuple[str, Callable[[], Tensor]] | None:
|
||||
name, gen = item
|
||||
if name.endswith(".expert_bias"):
|
||||
# Sarvam normalizes expert bias to zero mean
|
||||
inner = gen
|
||||
|
||||
def gen():
|
||||
t = inner()
|
||||
return t - t.mean()
|
||||
return super().filter_tensors((name, gen))
|
||||
|
||||
|
||||
@ModelBase.register("GroveMoeForCausalLM", "modeling_grove_moe.GroveMoeForCausalLM")
|
||||
class GroveMoeModel(TextModel):
|
||||
model_arch = gguf.MODEL_ARCH.GROVEMOE
|
||||
|
|
|
|||
|
|
@ -155,6 +155,7 @@ models = [
|
|||
{"name": "joyai-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jdopensource/JoyAI-LLM-Flash", },
|
||||
{"name": "kanana2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/kakaocorp/kanana-2-30b-a3b-instruct-2601", },
|
||||
{"name": "f2llmv2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/codefuse-ai/F2LLM-v2-4B", },
|
||||
{"name": "sarvam-moe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sarvamai/sarvam-30b", },
|
||||
]
|
||||
|
||||
# some models are known to be broken upstream, so we will skip them as exceptions
|
||||
|
|
|
|||
|
|
@ -503,6 +503,14 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|||
};
|
||||
byte_encode = false; // uses raw UTF-8, not GPT-2 byte encoding
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_SARVAM_MOE:
|
||||
// Sarvam uses SPM-style BPE (same shape as Gemma4): spaces replaced with U+2581
|
||||
// by the normalizer, BPE merges over the whole text on raw UTF-8.
|
||||
regex_exprs = {
|
||||
"[^\\n]+|[\\n]+",
|
||||
};
|
||||
byte_encode = false;
|
||||
break;
|
||||
default:
|
||||
// default regex for BPE tokenization pre-processing
|
||||
regex_exprs = {
|
||||
|
|
@ -2005,6 +2013,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|||
tokenizer_pre == "gemma4") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_GEMMA4;
|
||||
escape_whitespaces = true;
|
||||
} else if (
|
||||
tokenizer_pre == "sarvam-moe") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_SARVAM_MOE;
|
||||
escape_whitespaces = true;
|
||||
clean_spaces = false;
|
||||
} else if (
|
||||
tokenizer_pre == "jina-v1-en" ||
|
||||
tokenizer_pre == "jina-v2-code" ||
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ enum llama_vocab_pre_type {
|
|||
LLAMA_VOCAB_PRE_TYPE_JOYAI_LLM = 48,
|
||||
LLAMA_VOCAB_PRE_TYPE_JAIS2 = 49,
|
||||
LLAMA_VOCAB_PRE_TYPE_GEMMA4 = 50,
|
||||
LLAMA_VOCAB_PRE_TYPE_SARVAM_MOE = 51,
|
||||
};
|
||||
|
||||
struct LLM_KV;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue