diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index 7aa2c4bd4..7e344f4b8 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -390,12 +390,12 @@ extern "C" {
         GGML_TYPE_F64     = 28,
         GGML_TYPE_IQ1_M   = 29,
         GGML_TYPE_BF16    = 30,
-        // GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
-        // GGML_TYPE_Q4_0_4_8 = 32,
-        // GGML_TYPE_Q4_0_8_8 = 33,
+        GGML_TYPE_Q4_0_4_4 = 31, //deprecated upstream
+        GGML_TYPE_Q4_0_4_8 = 32, //deprecated upstream
+        GGML_TYPE_Q4_0_8_8 = 33, //deprecated upstream
         GGML_TYPE_TQ1_0   = 34,
         GGML_TYPE_TQ2_0   = 35,
-        // GGML_TYPE_IQ4_NL_4_4 = 36,
+        GGML_TYPE_IQ4_NL_4_4 = 36, //deprecated upstream
         // GGML_TYPE_IQ4_NL_4_8 = 37,
         // GGML_TYPE_IQ4_NL_8_8 = 38,
         GGML_TYPE_COUNT   = 39,
diff --git a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
index 386cd53d6..977b07585 100644
--- a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
+++ b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
@@ -3692,7 +3692,14 @@ static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_in
     return out;
 }
 
+static bool kcpp_q_already_repacked = false; //to support legacy q4_0_M_N quants that were preconverted.
+
 static int repack_q4_0_to_q4_0_4_bl(struct ggml_tensor * t, int interleave_block, const void * GGML_RESTRICT data, size_t data_size) {
+    if(kcpp_q_already_repacked) //using legacy prepacked quant, so just copy it
+    {
+        memcpy(t->data, data, data_size);
+        return 0;
+    }
     GGML_ASSERT(t->type == GGML_TYPE_Q4_0);
     GGML_ASSERT(interleave_block == 4 || interleave_block == 8);
     constexpr int nrows_interleaved = 4;
@@ -3724,6 +3731,11 @@ static int repack_q4_0_to_q4_0_4_bl(struct ggml_tensor * t, int interleave_block
 }
 
 static int repack_q4_0_to_q4_0_8_bl(struct ggml_tensor * t, int interleave_block, const void * GGML_RESTRICT data, size_t data_size) {
+    if(kcpp_q_already_repacked) //using legacy prepacked quant, so just copy it
+    {
+        memcpy(t->data, data, data_size);
+        return 0;
+    }
     GGML_ASSERT(t->type == GGML_TYPE_Q4_0);
     GGML_ASSERT(interleave_block == 8);
     constexpr int nrows_interleaved = 8;
@@ -3790,6 +3802,11 @@ static block_iq4_nlx4 make_block_iq4_nlx4(block_iq4_nl * in, unsigned int blck_s
 }
 
 static int repack_iq4_nl_to_iq4_nl_4_bl(struct ggml_tensor * t, int interleave_block, const void * GGML_RESTRICT data, size_t data_size) {
+    if(kcpp_q_already_repacked) //using legacy prepacked quant, so just copy it
+    {
+        memcpy(t->data, data, data_size);
+        return 0;
+    }
     GGML_ASSERT(t->type == GGML_TYPE_IQ4_NL);
     //GGML_ASSERT(interleave_block == 4 || interleave_block == 8);
     GGML_ASSERT(interleave_block == 4);
@@ -4143,6 +4160,15 @@ static const tensor_traits<block_iq4_nl, 4, 4> iq4_nl_4x4_q8_0;
 }
 }  // namespace ggml::cpu::aarch64
 
+static void flag_aarch_prepacked_quant(int type)
+{
+    if(!kcpp_q_already_repacked)
+    {
+        printf("\nWARNING! Legacy aarch64 prepacked QM_0_M_N quant (%d) detected! Please switch to Q4_0!\n",type);
+        kcpp_q_already_repacked = true;
+    }
+}
+
 static const ggml::cpu::tensor_traits * ggml_aarch64_get_optimal_repack_type(const struct ggml_tensor * cur) {
     if (cur->type == GGML_TYPE_Q4_0) {
         if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
@@ -4167,6 +4193,26 @@ static const ggml::cpu::tensor_traits * ggml_aarch64_get_optimal_repack_type(con
             }
         }
     }
+    else if (cur->type == GGML_TYPE_Q4_0_4_4) //kcpp backport old quant support
+    {
+        flag_aarch_prepacked_quant(cur->type);
+        return &ggml::cpu::aarch64::q4_0_4x4_q8_0;
+    }
+    else if (cur->type == GGML_TYPE_Q4_0_4_8)
+    {
+        flag_aarch_prepacked_quant(cur->type);
+        return &ggml::cpu::aarch64::q4_0_4x8_q8_0;
+    }
+    else if (cur->type == GGML_TYPE_Q4_0_8_8)
+    {
+        flag_aarch_prepacked_quant(cur->type);
+        return &ggml::cpu::aarch64::q4_0_8x8_q8_0;
+    }
+    else if (cur->type == GGML_TYPE_IQ4_NL)
+    {
+        flag_aarch_prepacked_quant(cur->type);
+        return &ggml::cpu::aarch64::iq4_nl_4x4_q8_0;
+    }
 
     return nullptr;
 }
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index fb56fb500..1077d8b91 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -792,23 +792,32 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
         .to_float                 = (ggml_to_float_t) ggml_bf16_to_fp32_row,
         .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
     },
-    [31] = { // GGML_TYPE_Q4_0_4_4
-        .type_name                = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
-        .blck_size                = 0,
-        .type_size                = 0,
-        .is_quantized             = false,
+    [GGML_TYPE_Q4_0_4_4] = { // deprecated upstream
+        .type_name                = "q4_0_4x4",
+        .blck_size                = QK4_0,
+        .blck_size_interleave     = 4,
+        .type_size                = sizeof(block_q4_0),
+        .is_quantized             = true,
+        .to_float                 = NULL,
+        .from_float_ref           = NULL,
     },
-    [32] = { // GGML_TYPE_Q4_0_4_8
-        .type_name                = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
-        .blck_size                = 0,
-        .type_size                = 0,
-        .is_quantized             = false,
+    [GGML_TYPE_Q4_0_4_8] = { // deprecated upstream
+        .type_name                = "q4_0_4x8",
+        .blck_size                = QK4_0,
+        .blck_size_interleave     = 8,
+        .type_size                = sizeof(block_q4_0),
+        .is_quantized             = true,
+        .to_float                 = NULL,
+        .from_float_ref           = NULL,
     },
-    [33] = { // GGML_TYPE_Q4_0_8_8
-        .type_name                = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
-        .blck_size                = 0,
-        .type_size                = 0,
-        .is_quantized             = false,
+    [GGML_TYPE_Q4_0_8_8] = { // deprecated upstream
+        .type_name                = "q4_0_8x8",
+        .blck_size                = QK4_0,
+        .blck_size_interleave     = 8,
+        .type_size                = sizeof(block_q4_0),
+        .is_quantized             = true,
+        .to_float                 = NULL,
+        .from_float_ref           = NULL,
     },
     [GGML_TYPE_TQ1_0] = {
         .type_name                = "tq1_0",
@@ -826,11 +835,14 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
         .to_float                 = (ggml_to_float_t) dequantize_row_tq2_0,
         .from_float_ref           = (ggml_from_float_t) quantize_row_tq2_0_ref,
     },
-    [36] = { // GGML_TYPE_IQ4_NL_4_4
-        .type_name                = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
-        .blck_size                = 0,
-        .type_size                = 0,
-        .is_quantized             = false,
+    [GGML_TYPE_IQ4_NL_4_4] = { // deprecated upstream
+        .type_name                = "iq4_nl_4x4",
+        .blck_size                = QK4_NL,
+        .blck_size_interleave     = 4,
+        .type_size                = sizeof(block_iq4_nl),
+        .is_quantized             = true,
+        .to_float                 = NULL,
+        .from_float_ref           = NULL,
     },
     [37] = { // GGML_TYPE_IQ4_NL_4_8
         .type_name                = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",