mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
custom voice handling
This commit is contained in:
parent
67ef5e6c02
commit
2abe11071b
3 changed files with 31 additions and 17 deletions
1
expose.h
1
expose.h
|
@ -247,6 +247,7 @@ struct tts_generation_inputs
|
||||||
const char * prompt = nullptr;
|
const char * prompt = nullptr;
|
||||||
const int speaker_seed = 0;
|
const int speaker_seed = 0;
|
||||||
const int audio_seed = 0;
|
const int audio_seed = 0;
|
||||||
|
const char * custom_speaker_voice = "";
|
||||||
const char * custom_speaker_text = "";
|
const char * custom_speaker_text = "";
|
||||||
const char * custom_speaker_data = "";
|
const char * custom_speaker_data = "";
|
||||||
};
|
};
|
||||||
|
|
|
@ -352,6 +352,7 @@ class tts_generation_inputs(ctypes.Structure):
|
||||||
_fields_ = [("prompt", ctypes.c_char_p),
|
_fields_ = [("prompt", ctypes.c_char_p),
|
||||||
("speaker_seed", ctypes.c_int),
|
("speaker_seed", ctypes.c_int),
|
||||||
("audio_seed", ctypes.c_int),
|
("audio_seed", ctypes.c_int),
|
||||||
|
("custom_speaker_voice", ctypes.c_char_p),
|
||||||
("custom_speaker_text", ctypes.c_char_p),
|
("custom_speaker_text", ctypes.c_char_p),
|
||||||
("custom_speaker_data", ctypes.c_char_p)]
|
("custom_speaker_data", ctypes.c_char_p)]
|
||||||
|
|
||||||
|
@ -1880,6 +1881,7 @@ def tts_generate(genparams):
|
||||||
else:
|
else:
|
||||||
voice = simple_lcg_hash(voicestr.strip()) if voicestr else 1
|
voice = simple_lcg_hash(voicestr.strip()) if voicestr else 1
|
||||||
inputs = tts_generation_inputs()
|
inputs = tts_generation_inputs()
|
||||||
|
inputs.custom_speaker_voice = normalized_voice.encode("UTF-8")
|
||||||
inputs.prompt = prompt.encode("UTF-8")
|
inputs.prompt = prompt.encode("UTF-8")
|
||||||
inputs.speaker_seed = voice
|
inputs.speaker_seed = voice
|
||||||
aseed = -1
|
aseed = -1
|
||||||
|
|
|
@ -502,6 +502,7 @@ static int tts_max_len = 4096;
|
||||||
static bool is_ttscpp_file = false;
|
static bool is_ttscpp_file = false;
|
||||||
static generation_configuration * ttscpp_config = nullptr;
|
static generation_configuration * ttscpp_config = nullptr;
|
||||||
static struct tts_runner * ttscpp_runner = nullptr;
|
static struct tts_runner * ttscpp_runner = nullptr;
|
||||||
|
static std::string detectedarch = "";
|
||||||
|
|
||||||
int total_tts_gens = 0;
|
int total_tts_gens = 0;
|
||||||
static std::string tts_executable_path = "";
|
static std::string tts_executable_path = "";
|
||||||
|
@ -540,7 +541,7 @@ bool ttstype_load_model(const tts_load_model_inputs inputs)
|
||||||
|
|
||||||
std::string modelfile_ttc = inputs.ttc_model_filename;
|
std::string modelfile_ttc = inputs.ttc_model_filename;
|
||||||
std::string modelfile_cts = inputs.cts_model_filename;
|
std::string modelfile_cts = inputs.cts_model_filename;
|
||||||
std::string detectedarch = gguf_get_model_arch(modelfile_ttc);
|
detectedarch = gguf_get_model_arch(modelfile_ttc);
|
||||||
|
|
||||||
is_ttscpp_file = false;
|
is_ttscpp_file = false;
|
||||||
if (detectedarch!="" && SUPPORTED_ARCHITECTURES.find(detectedarch) != SUPPORTED_ARCHITECTURES.end()) {
|
if (detectedarch!="" && SUPPORTED_ARCHITECTURES.find(detectedarch) != SUPPORTED_ARCHITECTURES.end()) {
|
||||||
|
@ -663,24 +664,34 @@ static tts_generation_outputs ttstype_generate_ttscpp(const tts_generation_input
|
||||||
std::string prompt = inputs.prompt;
|
std::string prompt = inputs.prompt;
|
||||||
double ttstime = 0;
|
double ttstime = 0;
|
||||||
timer_start();
|
timer_start();
|
||||||
switch(speaker_seed)
|
|
||||||
|
std::vector<std::string> vmapper = {};
|
||||||
|
std::vector<std::string> vpermitted = {};
|
||||||
|
|
||||||
|
if(detectedarch=="kokoro")
|
||||||
{
|
{
|
||||||
case 1:
|
vmapper = {"am_echo","af_heart","af_alloy","bm_daniel","bf_isabella"};
|
||||||
voiceused = "am_echo";
|
vpermitted = {"af_alloy", "af_aoede", "af_bella", "af_heart", "af_jessica", "af_kore", "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky", "am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael", "am_onyx", "am_puck", "am_santa", "bf_alice", "bf_emma", "bf_isabella", "bf_lily", "bm_daniel", "bm_fable", "bm_george", "bm_lewis"};
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
voiceused = "af_alloy";
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
voiceused = "af_jessica";
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
voiceused = "bm_daniel";
|
|
||||||
break;
|
|
||||||
case 5:
|
|
||||||
voiceused = "bf_isabella";
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
else if(detectedarch=="dia")
|
||||||
|
{
|
||||||
|
vmapper = {"zoe", "zac", "jess", "leo", "mia"};
|
||||||
|
vpermitted = {"zoe", "zac","jess", "leo", "mia", "julia", "leah"};
|
||||||
|
}
|
||||||
|
|
||||||
|
if(speaker_seed>=1 && speaker_seed<=5 && vmapper.size()>=5)
|
||||||
|
{
|
||||||
|
voiceused = vmapper[speaker_seed-1];
|
||||||
|
}
|
||||||
|
else if(vpermitted.size()>0)
|
||||||
|
{
|
||||||
|
//if we can match the voice, use it
|
||||||
|
const std::string cspeaker = inputs.custom_speaker_voice;
|
||||||
|
if (std::find(vpermitted.begin(), vpermitted.end(), cspeaker) != vpermitted.end()) {
|
||||||
|
voiceused = cspeaker;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(ttsdebugmode==1 && !tts_is_quiet)
|
if(ttsdebugmode==1 && !tts_is_quiet)
|
||||||
{
|
{
|
||||||
printf("\nUsing Speaker ID: %d, Voice: %s", speaker_seed, voiceused.c_str());
|
printf("\nUsing Speaker ID: %d, Voice: %s", speaker_seed, voiceused.c_str());
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue