Gradient rope formula with offsets (#938)

* Gradient rope formula with offsets Positive for Solar models Negative for Llama 1 and 2 models * Update gpttype_adapter.cpp Remove L1/L2 * cleanup PR, skip llama models, keep prints behind debug mode --------- Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com>
2025-09-10 00:54:41 +00:00 · 2024-06-25 14:46:34 +02:00 · 2024-06-25 14:46:34 +02:00 · cb2336f5d9
commit cb2336f5d9
parent dd5cda06b7
3 changed files with 61 additions and 10 deletions
--- a/model_adapter.cpp
+++ b/model_adapter.cpp
@ -306,10 +306,16 @@ void print_tok_vec(std::vector<float> &embd)
            {
                fileformatmeta->model_architecture = GGUFArch::ARCH_MAMBA;
            }
-            else if(modelarch=="llama" && freq_base_train==10000.0f && n_tensors==435)
+            else if(modelarch=="llama" && freq_base_train==10000.0f && (n_tensors==435 || n_tensors==611))
            {
                fileformatmeta->model_architecture = GGUFArch::ARCH_SOLAR;
            }
+            else if(modelarch=="llama" && freq_base_train==10000.0f)
+            {
+                fileformatmeta->model_architecture = GGUFArch::ARCH_MISTRAL_LLAMA_1_AND_2;
+            }
+            printf("Arch Category: %d\n",fileformatmeta->model_architecture);
+
        }

        gguf_free(ctx);