From b0a8d11584ece89034081483d628486de4fdeaf8 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 24 Aug 2025 17:42:49 +0800 Subject: [PATCH] add tts max length for kokoro (+1 squashed commits) Squashed commits: [c1c6feaf] add tts max length for kokoro --- koboldcpp.py | 2 +- otherarch/tts_adapter.cpp | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 5e5df15a4..9bee1de34 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -64,7 +64,7 @@ dry_seq_break_max = 128 extra_images_max = 4 # global vars -KcppVersion = "1.98" +KcppVersion = "1.98.1" showdebug = True kcpp_instance = None #global running instance global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_config_target":""} diff --git a/otherarch/tts_adapter.cpp b/otherarch/tts_adapter.cpp index 0b2f63a2e..acf52ca3e 100644 --- a/otherarch/tts_adapter.cpp +++ b/otherarch/tts_adapter.cpp @@ -478,6 +478,32 @@ std::string trim_words(const std::string& input, const std::string& separator, s return result.str(); } +static std::string TruncateToFirstNumberWords(const std::string& input, int limit) { + static const std::regex wordRegex(R"(\b[\w'-]+\b)"); + std::sregex_iterator words_begin(input.begin(), input.end(), wordRegex); + std::sregex_iterator words_end; + int count = 0; + std::size_t cutoffPos = std::string::npos; + if(limit<=0) + { + return ""; + } + for (auto it = words_begin; it != words_end; ++it) { + ++count; + if (count >= limit) { + // position AFTER the last matched word + cutoffPos = it->position() + it->length(); + break; + } + } + if (cutoffPos == std::string::npos) { + // fewer than N words, return original + return input; + } + // Preserve everything up to and including the Nth word + return input.substr(0, cutoffPos); +} + static llama_context * ttc_ctx = nullptr; //text to codes ctx static llama_context * cts_ctx = nullptr; //codes to speech @@ -562,6 +588,7 @@ bool ttstype_load_model(const tts_load_model_inputs inputs) } ttsdebugmode = inputs.debugmode; + tts_max_len = inputs.ttsmaxlen; // tts init if (is_ttscpp_file) { @@ -577,8 +604,6 @@ bool ttstype_load_model(const tts_load_model_inputs inputs) nthreads = inputs.threads; - tts_max_len = inputs.ttsmaxlen; - tts_model_params.use_mmap = false; tts_model_params.use_mlock = false; tts_model_params.n_gpu_layers = inputs.gpulayers; //offload if possible @@ -692,6 +717,11 @@ static tts_generation_outputs ttstype_generate_ttscpp(const tts_generation_input } } + if(tts_max_len>0) + { + prompt = TruncateToFirstNumberWords(prompt,tts_max_len); + } + if(ttsdebugmode==1 && !tts_is_quiet) { printf("\nUsing Speaker ID: %d, Voice: %s", speaker_seed, voiceused.c_str());