mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 00:54:41 +00:00
Gradient rope formula with offsets (#938)
* Gradient rope formula with offsets Positive for Solar models Negative for Llama 1 and 2 models * Update gpttype_adapter.cpp Remove L1/L2 * cleanup PR, skip llama models, keep prints behind debug mode --------- Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com>
This commit is contained in:
parent
dd5cda06b7
commit
cb2336f5d9
3 changed files with 61 additions and 10 deletions
|
@ -306,10 +306,16 @@ void print_tok_vec(std::vector<float> &embd)
|
|||
{
|
||||
fileformatmeta->model_architecture = GGUFArch::ARCH_MAMBA;
|
||||
}
|
||||
else if(modelarch=="llama" && freq_base_train==10000.0f && n_tensors==435)
|
||||
else if(modelarch=="llama" && freq_base_train==10000.0f && (n_tensors==435 || n_tensors==611))
|
||||
{
|
||||
fileformatmeta->model_architecture = GGUFArch::ARCH_SOLAR;
|
||||
}
|
||||
else if(modelarch=="llama" && freq_base_train==10000.0f)
|
||||
{
|
||||
fileformatmeta->model_architecture = GGUFArch::ARCH_MISTRAL_LLAMA_1_AND_2;
|
||||
}
|
||||
printf("Arch Category: %d\n",fileformatmeta->model_architecture);
|
||||
|
||||
}
|
||||
|
||||
gguf_free(ctx);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue