From 271c4c332c0bb49e41aa078d1ec14369ebad841a Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sun, 19 Apr 2026 20:40:07 +0800
Subject: [PATCH] hack to allow kokoro to remain functional even with much
 higher GGML_SCHED_MAX_SPLIT_INPUTS

---
 ggml/src/ggml-backend.cpp             | 12 ++++++++----
 otherarch/ttscpp/src/kokoro_model.cpp |  4 ++--
 otherarch/ttscpp/src/kokoro_model.h   | 10 ++++++++--
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index 392281e85..76b235626 100644
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -752,9 +752,9 @@ static bool ggml_is_view_op(enum ggml_op op) {
 #ifndef GGML_SCHED_MAX_BACKENDS
 #define GGML_SCHED_MAX_BACKENDS 16
 #endif
-//kcpp yolo fix: decreased from 30 to 16 in order to try resolve tts oom issues.
+//kcpp yolo fix: decreased from 30 to 16 in order to try resolve tts oom issues. edit: reverted, new hack to solve kokoro added as kcpp_kokoro_alloc_hack
 #ifndef GGML_SCHED_MAX_SPLIT_INPUTS
-#define GGML_SCHED_MAX_SPLIT_INPUTS 16
+#define GGML_SCHED_MAX_SPLIT_INPUTS 30
 #endif
 
 #ifndef GGML_SCHED_MAX_COPIES
@@ -1731,6 +1731,9 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
     return GGML_STATUS_SUCCESS;
 }
 
+bool kcpp_kokoro_alloc_hack = false; //the kokoro allocation is too big due to the massive graph but there is nowhere else we can patch this
+//it doesnt need such a big alloc as there are not many graph splits. So, just adjust the allocation if triggered
+
 ggml_backend_sched_t ggml_backend_sched_new(
         ggml_backend_t * backends,
         ggml_backend_buffer_type_t * bufts,
@@ -1764,7 +1767,8 @@ ggml_backend_sched_t ggml_backend_sched_new(
     sched->hv_tensor_copies      = (ggml_tensor **) malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
 
     const size_t ggml_sched_max_splits = graph_size; // at most there is one split for each node in the graph
-    const size_t nodes_size = graph_size + ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2;
+    const size_t alloc_mul_adjust = (kcpp_kokoro_alloc_hack?4:1); //kcpp: kokoro needs this as the graph size is too big
+    const size_t nodes_size = graph_size + ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2/alloc_mul_adjust;
     sched->node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
     sched->leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
     sched->prev_node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
@@ -1773,7 +1777,7 @@ ggml_backend_sched_t ggml_backend_sched_new(
     sched->debug_graph_size = 0;
     sched->debug_prev_graph_size = 0;
 
-    sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
+    sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor)/alloc_mul_adjust + ggml_graph_overhead_custom(graph_size, false);
     sched->context_buffer = (char *) malloc(sched->context_buffer_size);
 
     const int initial_splits_capacity = 16;
diff --git a/otherarch/ttscpp/src/kokoro_model.cpp b/otherarch/ttscpp/src/kokoro_model.cpp
index eef44ae00..c7543ac93 100644
--- a/otherarch/ttscpp/src/kokoro_model.cpp
+++ b/otherarch/ttscpp/src/kokoro_model.cpp
@@ -1489,7 +1489,7 @@ struct kokoro_duration_context * build_new_duration_kokoro_context(struct kokoro
     kctx->backend_cpu = ggml_backend_cpu_init();
     kctx->set_threads();
     kctx->build_schedule();
-    kctx->buf_compute_meta.resize(ggml_tensor_overhead()*model->max_duration_nodes()*2 + ggml_graph_overhead_custom(model->max_duration_nodes()*2, false));
+    kctx->buf_compute_meta.resize(ggml_tensor_overhead()*model->max_duration_nodes()*5 + ggml_graph_overhead_custom(model->max_duration_nodes()*5, false));
     return kctx;
 }
 
@@ -1499,6 +1499,6 @@ struct kokoro_context * build_new_kokoro_context(struct kokoro_model * model, in
     kctx->backend_cpu = ggml_backend_cpu_init();
     kctx->set_threads();
     kctx->build_schedule();
-    kctx->buf_compute_meta.resize(ggml_tensor_overhead()*model->max_gen_nodes()*20 + ggml_graph_overhead_custom(model->max_gen_nodes()*20, false));
+    kctx->buf_compute_meta.resize(ggml_tensor_overhead()*model->max_gen_nodes()*30 + ggml_graph_overhead_custom(model->max_gen_nodes()*30, false));
     return kctx;
 }
diff --git a/otherarch/ttscpp/src/kokoro_model.h b/otherarch/ttscpp/src/kokoro_model.h
index aa35291b7..a8359fff0 100644
--- a/otherarch/ttscpp/src/kokoro_model.h
+++ b/otherarch/ttscpp/src/kokoro_model.h
@@ -312,6 +312,8 @@ struct kokoro_ubatch {
     struct kokoro_duration_response * resp = nullptr;
 };
 
+extern bool kcpp_kokoro_alloc_hack;
+
 struct kokoro_duration_context : runner_context {
     kokoro_duration_context(kokoro_model * model, int n_threads): runner_context(n_threads), model(model) {};
     ~kokoro_duration_context() {
@@ -332,7 +334,9 @@ struct kokoro_duration_context : runner_context {
     struct ggml_tensor * token_types = nullptr;
 
     void build_schedule() {
-        runner_context::build_schedule(model->max_duration_nodes()*2);
+		kcpp_kokoro_alloc_hack = true;
+        runner_context::build_schedule(model->max_duration_nodes()*5);
+		kcpp_kokoro_alloc_hack = false;
     }
 };
 
@@ -410,7 +414,9 @@ struct kokoro_context : runner_context {
     struct ggml_tensor * uv_noise_data;
 
     void build_schedule() {
-        runner_context::build_schedule(model->max_gen_nodes()*20);
+		kcpp_kokoro_alloc_hack = true;
+        runner_context::build_schedule(model->max_gen_nodes()*30);
+		kcpp_kokoro_alloc_hack = false;
     }
 };