mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-15 11:29:43 +00:00
wip, adding IPA for kokoro
This commit is contained in:
parent
3138a151c2
commit
3f621be7dd
3 changed files with 65185 additions and 0 deletions
65118
kokoro_ipa.embd
Normal file
65118
kokoro_ipa.embd
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1387,6 +1387,24 @@ std::vector<std::vector<uint32_t>> kokoro_runner::tokenize_chunks(std::vector<st
|
||||||
return chunks;
|
return chunks;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//kcpp hacked a quick replace fn
|
||||||
|
static void kokoro_str_replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
||||||
|
if (search.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::string builder;
|
||||||
|
builder.reserve(s.length());
|
||||||
|
size_t pos = 0;
|
||||||
|
size_t last_pos = 0;
|
||||||
|
while ((pos = s.find(search, last_pos)) != std::string::npos) {
|
||||||
|
builder.append(s, last_pos, pos - last_pos);
|
||||||
|
builder.append(replace);
|
||||||
|
last_pos = pos + search.length();
|
||||||
|
}
|
||||||
|
builder.append(s, last_pos, std::string::npos);
|
||||||
|
s = std::move(builder);
|
||||||
|
}
|
||||||
|
|
||||||
int kokoro_runner::generate(std::string prompt, struct tts_response * response, std::string voice, std::string voice_code) {
|
int kokoro_runner::generate(std::string prompt, struct tts_response * response, std::string voice, std::string voice_code) {
|
||||||
if (model->voices.find(voice) == model->voices.end()) {
|
if (model->voices.find(voice) == model->voices.end()) {
|
||||||
fprintf(stdout,"\nFailed to find Kokoro voice '%s' aborting.\n", voice.c_str());
|
fprintf(stdout,"\nFailed to find Kokoro voice '%s' aborting.\n", voice.c_str());
|
||||||
|
@ -1406,7 +1424,10 @@ int kokoro_runner::generate(std::string prompt, struct tts_response * response,
|
||||||
// We preserve the other punctuation for cleaner chunking pre-tokenization
|
// We preserve the other punctuation for cleaner chunking pre-tokenization
|
||||||
prompt = replace_any(prompt, ",;:", "--");
|
prompt = replace_any(prompt, ",;:", "--");
|
||||||
prompt = replace_any(prompt, "\n", " ");
|
prompt = replace_any(prompt, "\n", " ");
|
||||||
|
kokoro_str_replace_all(prompt," - "," -- ");
|
||||||
|
kokoro_str_replace_all(prompt,"'s ","s ");
|
||||||
std::string phonemized_prompt = phmzr->text_to_phonemes(prompt);
|
std::string phonemized_prompt = phmzr->text_to_phonemes(prompt);
|
||||||
|
// printf("\nRESULT: %s\n",phonemized_prompt.c_str());
|
||||||
|
|
||||||
// Kokoro users a utf-8 single character tokenizer so if the size of the prompt is smaller than the max context length without the
|
// Kokoro users a utf-8 single character tokenizer so if the size of the prompt is smaller than the max context length without the
|
||||||
// beginning of sentence and end of sentence tokens then we can compute it all at once.
|
// beginning of sentence and end of sentence tokens then we can compute it all at once.
|
||||||
|
|
|
@ -798,9 +798,53 @@ bool phonemizer::handle_phonetic(corpus* text, std::string word, std::string* ou
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::unordered_map<std::string, std::string> kokoro_ipa_map;
|
||||||
|
void populate_kokoro_ipa_map(std::string executable_path)
|
||||||
|
{
|
||||||
|
std::string line;
|
||||||
|
auto filepath = executable_path + "kokoro_ipa.embd";
|
||||||
|
printf("\nReading Kokoro IPA from %s",filepath.c_str());
|
||||||
|
std::ifstream myfile(filepath);
|
||||||
|
if (myfile.is_open())
|
||||||
|
{
|
||||||
|
while (myfile.good())
|
||||||
|
{
|
||||||
|
getline(myfile, line);
|
||||||
|
auto parts = split(line, ",");
|
||||||
|
if(parts.size()==2)
|
||||||
|
{
|
||||||
|
kokoro_ipa_map[parts[0]] = parts[1];
|
||||||
|
} else {
|
||||||
|
printf("\nError reading line in Kokoro IPA!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
myfile.close();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf("\nUnable to open Kokoro IPA file");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::string found_word_to_ipa(std::string input)
|
||||||
|
{
|
||||||
|
auto it = kokoro_ipa_map.find(input);
|
||||||
|
if (it != kokoro_ipa_map.end()) {
|
||||||
|
return it->second; //found
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
bool phonemizer::process_word(corpus* text, std::string* output, std::string word, conditions* flags, bool has_accent) {
|
bool phonemizer::process_word(corpus* text, std::string* output, std::string word, conditions* flags, bool has_accent) {
|
||||||
dictionary_response* response;
|
dictionary_response* response;
|
||||||
size_t unaccented_size_difference = 0;
|
size_t unaccented_size_difference = 0;
|
||||||
|
|
||||||
|
std::string foundstr = found_word_to_ipa(word);
|
||||||
|
if(foundstr!="")
|
||||||
|
{
|
||||||
|
output->append(foundstr);
|
||||||
|
text->size_pop(word.size());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
if (has_accent) {
|
if (has_accent) {
|
||||||
response = dict->lookup(text, word, flags);
|
response = dict->lookup(text, word, flags);
|
||||||
if (!response->is_successful()) {
|
if (!response->is_successful()) {
|
||||||
|
@ -813,6 +857,8 @@ bool phonemizer::process_word(corpus* text, std::string* output, std::string wor
|
||||||
response = dict->lookup(text, word, flags);
|
response = dict->lookup(text, word, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//printf("\nSUCCESS: %d, word:%s, result:%s\n",response->is_successful(),word.c_str(),response->value.c_str());
|
||||||
|
|
||||||
if (response->is_successful()) {
|
if (response->is_successful()) {
|
||||||
if (flags->was_word && output->back() != ' ' && !flags->hyphenated) {
|
if (flags->was_word && output->back() != ' ' && !flags->hyphenated) {
|
||||||
output->append(" ");
|
output->append(" ");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue