Gradient rope formula with offsets (#938)

* Gradient rope formula with offsets

Positive for Solar models
Negative for Llama 1 and 2 models

* Update gpttype_adapter.cpp

Remove L1/L2

* cleanup PR, skip llama models, keep prints behind debug mode

---------

Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com>
This commit is contained in:
Nexesenex 2024-06-25 14:46:34 +02:00 committed by GitHub
parent dd5cda06b7
commit cb2336f5d9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 61 additions and 10 deletions

View file

@ -52,11 +52,12 @@ enum FileFormat
enum GGUFArch
{
ARCH_DEFAULT = 0, //used for llama and other generic gguf
ARCH_DEFAULT = 0, //used for llama3 and other generic gguf
ARCH_FALCON = 1,
ARCH_PHI = 2,
ARCH_MAMBA = 3,
ARCH_SOLAR = 4,
ARCH_MISTRAL_LLAMA_1_AND_2 = 5,
};
struct FileFormatExtraMeta