From 3867db34bcee7f3be659d1cd4dae54215eaa2e15 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Fri, 22 Aug 2025 23:48:29 +0800 Subject: [PATCH] adjust kokoro default voices --- otherarch/tts_adapter.cpp | 2 +- otherarch/ttscpp/cli/cli.cpp | 2 +- otherarch/ttscpp/src/kokoro_model.h | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/otherarch/tts_adapter.cpp b/otherarch/tts_adapter.cpp index 264a5a686..0b2f63a2e 100644 --- a/otherarch/tts_adapter.cpp +++ b/otherarch/tts_adapter.cpp @@ -670,7 +670,7 @@ static tts_generation_outputs ttstype_generate_ttscpp(const tts_generation_input if(detectedarch=="kokoro") { - vmapper = {"am_echo","af_heart","af_alloy","bm_daniel","bf_isabella"}; + vmapper = {"am_echo","af_heart","af_nicole","bm_fable","bf_isabella"}; vpermitted = {"af_alloy", "af_aoede", "af_bella", "af_heart", "af_jessica", "af_kore", "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky", "am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael", "am_onyx", "am_puck", "am_santa", "bf_alice", "bf_emma", "bf_isabella", "bf_lily", "bm_daniel", "bm_fable", "bm_george", "bm_lewis"}; } else if(detectedarch=="dia") diff --git a/otherarch/ttscpp/cli/cli.cpp b/otherarch/ttscpp/cli/cli.cpp index 980f1514b..f5a3c5364 100644 --- a/otherarch/ttscpp/cli/cli.cpp +++ b/otherarch/ttscpp/cli/cli.cpp @@ -39,7 +39,7 @@ int main(int argc, const char ** argv) { args.add_argument(bool_arg("--no-cross-attn", "(OPTIONAL) Whether to not include cross attention", "-ca")); args.add_argument(string_arg("--conditional-prompt", "(OPTIONAL) A distinct conditional prompt to use for generating. If none is provided the preencoded prompt is used. '--text-encoder-path' must be set to use conditional generation.", "-cp", false)); args.add_argument(string_arg("--text-encoder-path", "(OPTIONAL) The local path of the text encoder gguf model for conditional generaiton.", "-tep", false)); - args.add_argument(string_arg("--voice", "(OPTIONAL) The voice to use to generate the audio. This is only used for models with voice packs.", "-v", false, "af_alloy")); + args.add_argument(string_arg("--voice", "(OPTIONAL) The voice to use to generate the audio. This is only used for models with voice packs.", "-v", false, "af_heart")); args.add_argument(bool_arg("--vad", "(OPTIONAL) whether to apply voice inactivity detection (VAD) and strip silence form the end of the output (particularly useful for Parler TTS). By default, no VAD is applied.", "-va")); args.add_argument(string_arg("--espeak-voice-id", "(OPTIONAL) The espeak voice id to use for phonemization. This should only be specified when the correct espeak voice cannot be inferred from the kokoro voice ( see MultiLanguage Configuration in the README for more info).", "-eid", false)); args.add_argument(int_arg("--max-tokens", "(OPTIONAL) The max audio tokens or token batches to generate where each represents approximates 11 ms of audio. Only applied to Dia generation. If set to zero as is its default then the default max generation size. Warning values under 15 are not supported.", "-mt", false, &default_max_tokens)); diff --git a/otherarch/ttscpp/src/kokoro_model.h b/otherarch/ttscpp/src/kokoro_model.h index 7ffa9eba6..808f78761 100644 --- a/otherarch/ttscpp/src/kokoro_model.h +++ b/otherarch/ttscpp/src/kokoro_model.h @@ -318,7 +318,7 @@ struct kokoro_duration_context : runner_context { ggml_backend_buffer_free(buf_len_output); } - std::string voice = "af_alloy"; + std::string voice = "af_heart"; struct kokoro_model * model; ggml_backend_buffer_t buf_len_output = nullptr; @@ -396,7 +396,7 @@ struct kokoro_context : runner_context { } } - std::string voice = "af_alloy"; + std::string voice = "af_heart"; struct kokoro_model * model; @@ -442,7 +442,7 @@ struct kokoro_runner : tts_runner { kokoro_duration_runner * drunner; phonemizer * phmzr; - std::string default_voice = "af_alloy"; + std::string default_voice = "af_heart"; void init_build() { tts_runner::init_build(&kctx->buf_compute_meta);