from __future__ import annotations from .base import ModelBase, TextModel, gguf @ModelBase.register("OrionForCausalLM") class OrionModel(TextModel): model_arch = gguf.MODEL_ARCH.ORION def set_vocab(self): self._set_vocab_sentencepiece() def set_gguf_parameters(self): head_count = self.hparams["num_attention_heads"] head_count_kv = self.hparams.get("num_key_value_heads", head_count) ctx_length = 0 if "max_sequence_length" in self.hparams: ctx_length = self.hparams["max_sequence_length"] elif "max_position_embeddings" in self.hparams: ctx_length = self.hparams["max_position_embeddings"] elif "model_max_length" in self.hparams: ctx_length = self.hparams["model_max_length"] else: raise ValueError("gguf: can not find ctx length parameter.") self.gguf_writer.add_file_type(self.ftype) self.gguf_writer.add_tensor_data_layout("Meta AI original pth") self.gguf_writer.add_context_length(ctx_length) self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) self.gguf_writer.add_block_count(self.block_count) self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) self.gguf_writer.add_head_count(head_count) self.gguf_writer.add_head_count_kv(head_count_kv) # note: config provides rms norm but it is actually layer norm # ref: https://huggingface.co/OrionStarAI/Orion-14B-Chat/blob/276a17221ce42beb45f66fac657a41540e71f4f5/modeling_orion.py#L570-L571 self.gguf_writer.add_layer_norm_eps(self.hparams["rms_norm_eps"])