From 271c4c332c0bb49e41aa078d1ec14369ebad841a Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 19 Apr 2026 20:40:07 +0800 Subject: [PATCH] hack to allow kokoro to remain functional even with much higher GGML_SCHED_MAX_SPLIT_INPUTS --- ggml/src/ggml-backend.cpp | 12 ++++++++---- otherarch/ttscpp/src/kokoro_model.cpp | 4 ++-- otherarch/ttscpp/src/kokoro_model.h | 10 ++++++++-- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 392281e85..76b235626 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -752,9 +752,9 @@ static bool ggml_is_view_op(enum ggml_op op) { #ifndef GGML_SCHED_MAX_BACKENDS #define GGML_SCHED_MAX_BACKENDS 16 #endif -//kcpp yolo fix: decreased from 30 to 16 in order to try resolve tts oom issues. +//kcpp yolo fix: decreased from 30 to 16 in order to try resolve tts oom issues. edit: reverted, new hack to solve kokoro added as kcpp_kokoro_alloc_hack #ifndef GGML_SCHED_MAX_SPLIT_INPUTS -#define GGML_SCHED_MAX_SPLIT_INPUTS 16 +#define GGML_SCHED_MAX_SPLIT_INPUTS 30 #endif #ifndef GGML_SCHED_MAX_COPIES @@ -1731,6 +1731,9 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s return GGML_STATUS_SUCCESS; } +bool kcpp_kokoro_alloc_hack = false; //the kokoro allocation is too big due to the massive graph but there is nowhere else we can patch this +//it doesnt need such a big alloc as there are not many graph splits. So, just adjust the allocation if triggered + ggml_backend_sched_t ggml_backend_sched_new( ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, @@ -1764,7 +1767,8 @@ ggml_backend_sched_t ggml_backend_sched_new( sched->hv_tensor_copies = (ggml_tensor **) malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *)); const size_t ggml_sched_max_splits = graph_size; // at most there is one split for each node in the graph - const size_t nodes_size = graph_size + ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2; + const size_t alloc_mul_adjust = (kcpp_kokoro_alloc_hack?4:1); //kcpp: kokoro needs this as the graph size is too big + const size_t nodes_size = graph_size + ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2/alloc_mul_adjust; sched->node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->node_backend_ids[0])); sched->leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->leaf_backend_ids[0])); sched->prev_node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0])); @@ -1773,7 +1777,7 @@ ggml_backend_sched_t ggml_backend_sched_new( sched->debug_graph_size = 0; sched->debug_prev_graph_size = 0; - sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false); + sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor)/alloc_mul_adjust + ggml_graph_overhead_custom(graph_size, false); sched->context_buffer = (char *) malloc(sched->context_buffer_size); const int initial_splits_capacity = 16; diff --git a/otherarch/ttscpp/src/kokoro_model.cpp b/otherarch/ttscpp/src/kokoro_model.cpp index eef44ae00..c7543ac93 100644 --- a/otherarch/ttscpp/src/kokoro_model.cpp +++ b/otherarch/ttscpp/src/kokoro_model.cpp @@ -1489,7 +1489,7 @@ struct kokoro_duration_context * build_new_duration_kokoro_context(struct kokoro kctx->backend_cpu = ggml_backend_cpu_init(); kctx->set_threads(); kctx->build_schedule(); - kctx->buf_compute_meta.resize(ggml_tensor_overhead()*model->max_duration_nodes()*2 + ggml_graph_overhead_custom(model->max_duration_nodes()*2, false)); + kctx->buf_compute_meta.resize(ggml_tensor_overhead()*model->max_duration_nodes()*5 + ggml_graph_overhead_custom(model->max_duration_nodes()*5, false)); return kctx; } @@ -1499,6 +1499,6 @@ struct kokoro_context * build_new_kokoro_context(struct kokoro_model * model, in kctx->backend_cpu = ggml_backend_cpu_init(); kctx->set_threads(); kctx->build_schedule(); - kctx->buf_compute_meta.resize(ggml_tensor_overhead()*model->max_gen_nodes()*20 + ggml_graph_overhead_custom(model->max_gen_nodes()*20, false)); + kctx->buf_compute_meta.resize(ggml_tensor_overhead()*model->max_gen_nodes()*30 + ggml_graph_overhead_custom(model->max_gen_nodes()*30, false)); return kctx; } diff --git a/otherarch/ttscpp/src/kokoro_model.h b/otherarch/ttscpp/src/kokoro_model.h index aa35291b7..a8359fff0 100644 --- a/otherarch/ttscpp/src/kokoro_model.h +++ b/otherarch/ttscpp/src/kokoro_model.h @@ -312,6 +312,8 @@ struct kokoro_ubatch { struct kokoro_duration_response * resp = nullptr; }; +extern bool kcpp_kokoro_alloc_hack; + struct kokoro_duration_context : runner_context { kokoro_duration_context(kokoro_model * model, int n_threads): runner_context(n_threads), model(model) {}; ~kokoro_duration_context() { @@ -332,7 +334,9 @@ struct kokoro_duration_context : runner_context { struct ggml_tensor * token_types = nullptr; void build_schedule() { - runner_context::build_schedule(model->max_duration_nodes()*2); + kcpp_kokoro_alloc_hack = true; + runner_context::build_schedule(model->max_duration_nodes()*5); + kcpp_kokoro_alloc_hack = false; } }; @@ -410,7 +414,9 @@ struct kokoro_context : runner_context { struct ggml_tensor * uv_noise_data; void build_schedule() { - runner_context::build_schedule(model->max_gen_nodes()*20); + kcpp_kokoro_alloc_hack = true; + runner_context::build_schedule(model->max_gen_nodes()*30); + kcpp_kokoro_alloc_hack = false; } };