adjust kokoro default voices
Some checks failed
Copilot Setup Steps / copilot-setup-steps (push) Has been cancelled

This commit is contained in:
Concedo 2025-08-22 23:48:29 +08:00
parent 80dabbb689
commit 3867db34bc
3 changed files with 5 additions and 5 deletions

View file

@ -670,7 +670,7 @@ static tts_generation_outputs ttstype_generate_ttscpp(const tts_generation_input
if(detectedarch=="kokoro")
{
vmapper = {"am_echo","af_heart","af_alloy","bm_daniel","bf_isabella"};
vmapper = {"am_echo","af_heart","af_nicole","bm_fable","bf_isabella"};
vpermitted = {"af_alloy", "af_aoede", "af_bella", "af_heart", "af_jessica", "af_kore", "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky", "am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael", "am_onyx", "am_puck", "am_santa", "bf_alice", "bf_emma", "bf_isabella", "bf_lily", "bm_daniel", "bm_fable", "bm_george", "bm_lewis"};
}
else if(detectedarch=="dia")

View file

@ -39,7 +39,7 @@ int main(int argc, const char ** argv) {
args.add_argument(bool_arg("--no-cross-attn", "(OPTIONAL) Whether to not include cross attention", "-ca"));
args.add_argument(string_arg("--conditional-prompt", "(OPTIONAL) A distinct conditional prompt to use for generating. If none is provided the preencoded prompt is used. '--text-encoder-path' must be set to use conditional generation.", "-cp", false));
args.add_argument(string_arg("--text-encoder-path", "(OPTIONAL) The local path of the text encoder gguf model for conditional generaiton.", "-tep", false));
args.add_argument(string_arg("--voice", "(OPTIONAL) The voice to use to generate the audio. This is only used for models with voice packs.", "-v", false, "af_alloy"));
args.add_argument(string_arg("--voice", "(OPTIONAL) The voice to use to generate the audio. This is only used for models with voice packs.", "-v", false, "af_heart"));
args.add_argument(bool_arg("--vad", "(OPTIONAL) whether to apply voice inactivity detection (VAD) and strip silence form the end of the output (particularly useful for Parler TTS). By default, no VAD is applied.", "-va"));
args.add_argument(string_arg("--espeak-voice-id", "(OPTIONAL) The espeak voice id to use for phonemization. This should only be specified when the correct espeak voice cannot be inferred from the kokoro voice ( see MultiLanguage Configuration in the README for more info).", "-eid", false));
args.add_argument(int_arg("--max-tokens", "(OPTIONAL) The max audio tokens or token batches to generate where each represents approximates 11 ms of audio. Only applied to Dia generation. If set to zero as is its default then the default max generation size. Warning values under 15 are not supported.", "-mt", false, &default_max_tokens));

View file

@ -318,7 +318,7 @@ struct kokoro_duration_context : runner_context {
ggml_backend_buffer_free(buf_len_output);
}
std::string voice = "af_alloy";
std::string voice = "af_heart";
struct kokoro_model * model;
ggml_backend_buffer_t buf_len_output = nullptr;
@ -396,7 +396,7 @@ struct kokoro_context : runner_context {
}
}
std::string voice = "af_alloy";
std::string voice = "af_heart";
struct kokoro_model * model;
@ -442,7 +442,7 @@ struct kokoro_runner : tts_runner {
kokoro_duration_runner * drunner;
phonemizer * phmzr;
std::string default_voice = "af_alloy";
std::string default_voice = "af_heart";
void init_build() {
tts_runner::init_build(&kctx->buf_compute_meta);