mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-17 04:09:19 +00:00
* move conversion code to a dedicated conversion directory and split the files akin to the src/models architecture --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
37 lines
1.6 KiB
Python
37 lines
1.6 KiB
Python
from __future__ import annotations
|
|
|
|
from .base import ModelBase, TextModel, gguf
|
|
|
|
|
|
@ModelBase.register("OrionForCausalLM")
|
|
class OrionModel(TextModel):
|
|
model_arch = gguf.MODEL_ARCH.ORION
|
|
|
|
def set_vocab(self):
|
|
self._set_vocab_sentencepiece()
|
|
|
|
def set_gguf_parameters(self):
|
|
head_count = self.hparams["num_attention_heads"]
|
|
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
|
|
|
|
ctx_length = 0
|
|
if "max_sequence_length" in self.hparams:
|
|
ctx_length = self.hparams["max_sequence_length"]
|
|
elif "max_position_embeddings" in self.hparams:
|
|
ctx_length = self.hparams["max_position_embeddings"]
|
|
elif "model_max_length" in self.hparams:
|
|
ctx_length = self.hparams["model_max_length"]
|
|
else:
|
|
raise ValueError("gguf: can not find ctx length parameter.")
|
|
|
|
self.gguf_writer.add_file_type(self.ftype)
|
|
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
|
|
self.gguf_writer.add_context_length(ctx_length)
|
|
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
|
self.gguf_writer.add_block_count(self.block_count)
|
|
self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
|
|
self.gguf_writer.add_head_count(head_count)
|
|
self.gguf_writer.add_head_count_kv(head_count_kv)
|
|
# note: config provides rms norm but it is actually layer norm
|
|
# ref: https://huggingface.co/OrionStarAI/Orion-14B-Chat/blob/276a17221ce42beb45f66fac657a41540e71f4f5/modeling_orion.py#L570-L571
|
|
self.gguf_writer.add_layer_norm_eps(self.hparams["rms_norm_eps"])
|