switched to NTK aware scaling

2025-09-11 01:24:36 +00:00 · 2023-07-02 17:25:08 +08:00 · 2023-07-02 17:25:08 +08:00 · e17c8497cf
commit e17c8497cf
parent e19483ca0f
4 changed files with 26 additions and 25 deletions
--- a/ggml.c
+++ b/ggml.c
@ -4242,6 +4242,22 @@ static inline int ggml_up(int n, int m) {
 #define ggml_assert_aligned(ptr) \
    GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)

+float get_theta_scale(int n_dims,int n_past,int n_ctx)
+{
+   if(n_ctx<=2048) //normie mode
+   {
+        return powf(10000.0, -2.0f/n_dims);
+   }
+   else
+   {
+       //using scaled NTK aware ctx
+       float a = (n_ctx<=4096?4.0:8.0);
+       float m = powf(a, n_dims / (n_dims - 2.0));
+       float s = powf(10000.0 * m, -2.0f/n_dims);
+       return s;
+   }
+}
+
 ////////////////////////////////////////////////////////////////////////////////

 struct ggml_context * ggml_init(struct ggml_init_params params) {
@ -12531,7 +12547,7 @@ static void ggml_compute_forward_rope_f32(
    // row index used to determine which thread to use
    int ir = 0;

-    const float theta_scale = powf(10000.0, -2.0f/n_dims);
+    const float theta_scale = get_theta_scale(n_dims,n_past,n_ctx);

    const bool is_neox = mode & 2;
    const bool is_glm  = mode & 4;
@ -12571,9 +12587,7 @@ static void ggml_compute_forward_rope_f32(
                        dst_data[n_dims/2*3] = x2*sin_block_theta + x3*cos_block_theta;
                    }
                } else if (!is_neox) {
-                    if (n_ctx > GGML_TRAINING_CTX) {
-                        theta = theta * GGML_TRAINING_CTX / n_ctx;
-                    }
+
                    for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
                        const float cos_theta = cosf(theta);
                        const float sin_theta = sinf(theta);
@ -12674,7 +12688,7 @@ static void ggml_compute_forward_rope_f16(
    // row index used to determine which thread to use
    int ir = 0;

-    const float theta_scale = powf(10000.0, -2.0f/n_dims);
+    const float theta_scale = get_theta_scale(n_dims,n_past,n_ctx);

    const bool is_neox = mode & 2;
    const bool is_glm  = mode & 4;
@ -12714,9 +12728,6 @@ static void ggml_compute_forward_rope_f16(
                        dst_data[n_dims/2*3] = GGML_FP32_TO_FP16(x2*sin_block_theta + x3*cos_block_theta);
                    }
                } if (!is_neox) {
-                    if (n_ctx > GGML_TRAINING_CTX) {
-                        theta = theta * GGML_TRAINING_CTX / n_ctx;
-                    }
                    for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
                        const float cos_theta = cosf(theta);
                        const float sin_theta = sinf(theta);
@ -12842,7 +12853,7 @@ static void ggml_compute_forward_rope_back_f32(
    // row index used to determine which thread to use
    int ir = 0;

-    const float theta_scale = powf(10000.0, -2.0f/n_dims);
+    const float theta_scale = get_theta_scale(n_dims,n_past,n_ctx);

    const bool is_neox = mode & 2;

@ -12856,9 +12867,6 @@ static void ggml_compute_forward_rope_back_f32(
                float theta = (float)p;

                if (!is_neox) {
-                    if (n_ctx > GGML_TRAINING_CTX) {
-                        theta = theta * GGML_TRAINING_CTX / n_ctx;
-                    }
                    for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
                        const float cos_theta = cosf(theta);
                        const float sin_theta = sinf(theta);
@ -12959,7 +12967,7 @@ static void ggml_compute_forward_rope_back_f16(
    // row index used to determine which thread to use
    int ir = 0;

-    const float theta_scale = powf(10000.0, -2.0f/n_dims);
+    const float theta_scale = get_theta_scale(n_dims,n_past,n_ctx);

    const bool is_neox = mode & 2;

@ -12973,9 +12981,6 @@ static void ggml_compute_forward_rope_back_f16(
                float theta = (float)p;

                if (!is_neox) {
-                    if (n_ctx > GGML_TRAINING_CTX) {
-                        theta = theta * GGML_TRAINING_CTX / n_ctx;
-                    }
                    for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
                        const float cos_theta = cosf(theta);
                        const float sin_theta = sinf(theta);