diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index 2c5861f48..e9b513812 100644
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -656,9 +656,9 @@ static bool ggml_is_view_op(enum ggml_op op) {
 #ifndef GGML_SCHED_MAX_BACKENDS
 #define GGML_SCHED_MAX_BACKENDS 16
 #endif
-
+//kcpp yolo fix: decreased from 30 to 14 in order to try resolve tts oom issues.
 #ifndef GGML_SCHED_MAX_SPLIT_INPUTS
-#define GGML_SCHED_MAX_SPLIT_INPUTS 30
+#define GGML_SCHED_MAX_SPLIT_INPUTS 14
 #endif
 
 #ifndef GGML_SCHED_MAX_COPIES
diff --git a/koboldcpp.py b/koboldcpp.py
index 6321972cb..b70a11426 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -64,7 +64,7 @@ dry_seq_break_max = 128
 extra_images_max = 4
 
 # global vars
-KcppVersion = "1.99.3"
+KcppVersion = "1.99.4"
 showdebug = True
 kcpp_instance = None #global running instance
 global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_config_target":""}
diff --git a/otherarch/tts_adapter.cpp b/otherarch/tts_adapter.cpp
index fc9e1d695..f9a09ef22 100644
--- a/otherarch/tts_adapter.cpp
+++ b/otherarch/tts_adapter.cpp
@@ -671,7 +671,7 @@ bool ttstype_load_model(const tts_load_model_inputs inputs)
 
     // tts init
     if (is_ttscpp_file) {
-        ttscpp_config = new generation_configuration("am_echo", 25, 1.0, 1.0, true, "", 2048, 1.0);
+        ttscpp_config = new generation_configuration("am_echo", 25, 1.0, 1.0, true, "", 1600, 1.0);
         ttscpp_runner = runner_from_file(modelfile_ttc, inputs.threads, ttscpp_config, true);
         if (ttscpp_runner == nullptr) {
             printf("\nTTS Load Error: Failed to initialize TTSCPP!\n");