fix for jamba models - they have recurrent layers like rwkv, so context shifting and forwarding wont work on them.

This commit is contained in:
Concedo 2025-07-12 10:05:15 +08:00
parent e9473305d0
commit 5a3b2e3921
3 changed files with 12 additions and 7 deletions

View file

@ -324,6 +324,10 @@ void print_tok_vec(std::vector<float> &embd)
{
fileformatmeta->model_architecture = GGUFArch::ARCH_MAMBA;
}
else if(modelarch=="jamba")
{
fileformatmeta->model_architecture = GGUFArch::ARCH_JAMBA;
}
else if(modelarch=="llama" && freq_base_train==10000.0f && (n_tensors==435 || n_tensors==611))
{
fileformatmeta->model_architecture = GGUFArch::ARCH_SOLAR;