mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-09 19:46:11 +00:00
can't resolve the clicking
This commit is contained in:
parent
ff1d179e21
commit
75c919cfd4
5 changed files with 99 additions and 14 deletions
|
|
@ -19951,6 +19951,7 @@ excursions,ɪkskˈɜɹʒənz
|
|||
excusable,ɪkskjˈuzəbᵊl
|
||||
excused,ɪkskjˈuzd
|
||||
excusing,ɪkskjˈuzɪŋ
|
||||
excuses,ɪkskjˈuzᵻz
|
||||
exec,ɛɡzˈɛk
|
||||
execrable,ˈɛksəkɹəbᵊl
|
||||
execration,ˌɛksəkɹˈAʃən
|
||||
|
|
@ -39067,16 +39068,16 @@ organelles,ˌɔɹɡənˈɛlz
|
|||
organically,ɔɹɡˈænəkᵊli
|
||||
organic,ɔɹɡˈænɪk
|
||||
organics,ɔɹɡˈænɪks
|
||||
organisationally,ˌɔɹɡənəzˈAʃənᵊli
|
||||
organisational,ˌɔɹɡənəzˈAʃənᵊl
|
||||
organisation,ˌɔɹɡənəzˈAʃən
|
||||
organisations,ˌɔɹɡənəzˈAʃənz
|
||||
organised,ˈɔɹɡənˌIzd
|
||||
organise,ˈɔɹɡənˌIz
|
||||
organiser,ˈɔɹɡənˌIzəɹ
|
||||
organisers,ˈɔɹɡənˌIzəɹz
|
||||
organises,ˈɔɹɡənˌIzᵻz
|
||||
organising,ˈɔɹɡənˌIzɪŋ
|
||||
organizationally,ˌɔɹɡənəzˈAʃənᵊli
|
||||
organizational,ˌɔɹɡənəzˈAʃənᵊl
|
||||
organization,ˌɔɹɡənəzˈAʃən
|
||||
organizations,ˌɔɹɡənəzˈAʃənz
|
||||
organized,ˈɔɹɡənˌIzd
|
||||
organize,ˈɔɹɡənˌIz
|
||||
organizer,ˈɔɹɡənˌIzəɹ
|
||||
organizers,ˈɔɹɡənˌIzəɹz
|
||||
organizes,ˈɔɹɡənˌIzᵻz
|
||||
organizing,ˈɔɹɡənˌIzɪŋ
|
||||
organism,ˈɔɹɡənˌɪzəm
|
||||
organisms,ˈɔɹɡənˌɪzəmz
|
||||
organist,ˈɔɹɡənɪst
|
||||
|
|
|
|||
|
|
@ -64,6 +64,85 @@ struct wav_header {
|
|||
uint32_t data_size;
|
||||
};
|
||||
|
||||
// #include <vector>
|
||||
// #include <cstdio>
|
||||
// #include <cmath>
|
||||
|
||||
// static void audio_post_clean(std::vector<float>& data) { // detect clicks
|
||||
// const float silenceThreshold = 1e-5f;
|
||||
// const float noiseThreshold = 1e-3f;
|
||||
// const size_t minSilence = 100; // samples
|
||||
// const size_t noiseSpan = 150; // samples
|
||||
// const size_t minSilence2 = 100; // samples
|
||||
|
||||
// size_t len = data.size();
|
||||
|
||||
// int silencecounterA = 0;
|
||||
// int noisecounterA = 0;
|
||||
// int silencecounterB = 0;
|
||||
// int state = 0; // 0 = finding first silence, 1 = measuring noise, 2 = finding second silence
|
||||
|
||||
// size_t noiseStart = 0;
|
||||
|
||||
// for (size_t i = 0; i < len; ++i) {
|
||||
// float sample = std::fabs(data[i]);
|
||||
|
||||
// if (state == 0) { // finding first silence
|
||||
// if (sample < silenceThreshold) {
|
||||
// silencecounterA++;
|
||||
// } else {
|
||||
// if (silencecounterA >= minSilence) {
|
||||
// state = 1;
|
||||
// noisecounterA = 1;
|
||||
// noiseStart = i;
|
||||
// } else {
|
||||
// silencecounterA = 0;
|
||||
// noisecounterA = 0;
|
||||
// silencecounterB = 0;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// if (state == 1) { // measuring noise span
|
||||
// noisecounterA++;
|
||||
// if(sample>noiseThreshold)
|
||||
// {
|
||||
// state = 0;
|
||||
// silencecounterA = 0;
|
||||
// noisecounterA = 0;
|
||||
// silencecounterB = 0;
|
||||
// }
|
||||
// else if(noisecounterA>noiseSpan)
|
||||
// {
|
||||
// state = 2;
|
||||
// }
|
||||
// }
|
||||
// if (state == 2) { // finding second silence
|
||||
// if (sample < silenceThreshold) {
|
||||
// silencecounterB++;
|
||||
// if (silencecounterB >= minSilence2) {
|
||||
// // full click detected
|
||||
// size_t noiseend = noiseStart + noisecounterA - 1;
|
||||
// //printf("Click detected from %zu to %zu\n", noiseStart, noiseend);
|
||||
// for(size_t j=noiseStart;j<noiseend;++j)
|
||||
// {
|
||||
// data[j] *= 0.01f; //greatly suppress noise
|
||||
// }
|
||||
// // reset to search again
|
||||
// state = 0;
|
||||
// silencecounterA = 0;
|
||||
// noisecounterA = 0;
|
||||
// silencecounterB = 0;
|
||||
// }
|
||||
// } else {
|
||||
// state = 0;
|
||||
// silencecounterA = 0;
|
||||
// noisecounterA = 0;
|
||||
// silencecounterB = 0;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
static std::string save_wav16_base64(const std::vector<float> &data, int sample_rate) {
|
||||
std::ostringstream oss;
|
||||
wav_header header;
|
||||
|
|
@ -740,6 +819,7 @@ static tts_generation_outputs ttstype_generate_ttscpp(const tts_generation_input
|
|||
ttstime = timer_check();
|
||||
printf("\nTTS Generated audio in %.2fs.\n",ttstime);
|
||||
std::vector<float> wavdat = std::vector(response_data.data, response_data.data + response_data.n_outputs);
|
||||
//audio_post_clean(wavdat);
|
||||
last_generated_audio = save_wav16_base64(wavdat, ttscpp_runner->sampling_rate);
|
||||
output.data = last_generated_audio.c_str();
|
||||
output.status = 1;
|
||||
|
|
|
|||
|
|
@ -128,7 +128,7 @@ static const std::map<const char, std::string> LETTER_PHONEMES = {
|
|||
{'d', "dˈiː"},
|
||||
{'e', "ˈiː"},
|
||||
{'f', "ˈɛf"},
|
||||
{'j', "dʒˈeɪ"},
|
||||
{'g', "dʒˈi"},
|
||||
{'h', "ˈeɪtʃ"},
|
||||
{'i', "ˈaɪ"},
|
||||
{'j', "dʒˈeɪ"},
|
||||
|
|
|
|||
|
|
@ -1426,11 +1426,13 @@ int kokoro_runner::generate(std::string prompt, struct tts_response * response,
|
|||
prompt = replace_any(prompt, ";:", "--");
|
||||
prompt = replace_any(prompt, "\n", "--");
|
||||
kokoro_str_replace_all(prompt,"’","'");
|
||||
kokoro_str_replace_all(prompt,"Mr. ","Mister ");
|
||||
prompt = std::regex_replace(prompt, std::regex("(\\w)([.!?]) "), "$1$2, ");
|
||||
kokoro_str_replace_all(prompt," - "," -- ");
|
||||
kokoro_str_replace_all(prompt,"he's ","he is ");
|
||||
kokoro_str_replace_all(prompt,"'s ","s ");
|
||||
kokoro_str_replace_all(prompt,"n't ","nt ");
|
||||
kokoro_str_replace_all(prompt,"*"," ");
|
||||
std::string phonemized_prompt = phmzr->text_to_phonemes(prompt);
|
||||
// printf("\nRESULT: %s\n",phonemized_prompt.c_str());
|
||||
|
||||
|
|
|
|||
|
|
@ -893,9 +893,11 @@ bool phonemizer::process_word(corpus* text, std::string* output, std::string wor
|
|||
text->size_pop(word.size()+unaccented_size_difference);
|
||||
return true;
|
||||
}
|
||||
} else if (can_be_roman_numeral(word) && is_all_upper(word) && small_english_words.find(to_lower(word)) == small_english_words.end() && handle_roman_numeral(text, output, flags)) {
|
||||
return true;
|
||||
} else if (is_acronym_like(text, word, flags)) {
|
||||
}
|
||||
// else if (can_be_roman_numeral(word) && is_all_upper(word) && small_english_words.find(to_lower(word)) == small_english_words.end() && handle_roman_numeral(text, output, flags)) {
|
||||
// return true;
|
||||
// }
|
||||
else if (is_acronym_like(text, word, flags)) {
|
||||
return handle_acronym(text, word, output, flags);
|
||||
} else if (word.find(".") < word.length()) {
|
||||
bool part_has_accent = false;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue