handle contractions correctly, bump defaults

This commit is contained in:
Concedo 2025-08-18 22:33:44 +08:00
parent 2abe11071b
commit 9fb0611115
4 changed files with 74 additions and 15 deletions

View file

@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
-->
<script id="init-config">
const LITEVER = 273;
const LITEVER = 274;
const urlParams = new URLSearchParams(window.location.search);
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@ -3348,6 +3348,7 @@ Current version indicated by LITEVER below.
notify_on: false,
narrate_both_sides: false,
narrate_only_dialog: false,
embed_narrations: false,
voice_end_delay: 300,
voice_suppress_nonspeech: false,
voice_langcode: "auto",
@ -3431,7 +3432,7 @@ Current version indicated by LITEVER below.
second_ep_url:"",
max_context_length: (localflag?4096:3072),
max_length: (localflag?512:256),
max_length: (localflag?640:400),
last_maxctx: 0,
auto_ctxlen: true,
auto_genamt: true,
@ -12941,6 +12942,7 @@ Current version indicated by LITEVER below.
document.getElementById("no_escape_html").checked = no_escape_html;
document.getElementById("narrate_both_sides").checked = localsettings.narrate_both_sides;
document.getElementById("narrate_only_dialog").checked = localsettings.narrate_only_dialog;
document.getElementById("embed_narrations").checked = localsettings.embed_narrations;
document.getElementById("tts_speed").value = localsettings.tts_speed;
document.getElementById("voice_end_delay").value = localsettings.voice_end_delay;
document.getElementById("voice_suppress_nonspeech").checked = localsettings.voice_suppress_nonspeech;
@ -13473,6 +13475,7 @@ Current version indicated by LITEVER below.
no_escape_html = (document.getElementById("no_escape_html").checked?true:false);
localsettings.narrate_both_sides = (document.getElementById("narrate_both_sides").checked?true:false);
localsettings.narrate_only_dialog = (document.getElementById("narrate_only_dialog").checked?true:false);
localsettings.embed_narrations = (document.getElementById("embed_narrations").checked?true:false);
localsettings.tts_speed = document.getElementById("tts_speed").value;
localsettings.voice_end_delay = document.getElementById("voice_end_delay").value;
localsettings.voice_suppress_nonspeech = (document.getElementById("voice_suppress_nonspeech").checked?true:false);
@ -15448,6 +15451,16 @@ Current version indicated by LITEVER below.
a.download = "audio.wav";
setTimeout(function(){a.click()},20);
}
function tts_embed_audio(arrayBufferData)
{
var file = new Blob([arrayBufferData], { type: 'audio/wav' });
var reader = new FileReader();
reader.onload = function() {
var base64Audio = this.result;
self_upload_audio(base64Audio, "tts_audio");
};
reader.readAsDataURL(file);
}
function tts_speak(text, speech_synth_override=null, do_download=false)
{
@ -15486,6 +15499,12 @@ Current version indicated by LITEVER below.
}
}
let do_embed_tts = false;
if(!do_download && speech_synth_override==null) //dont use it for tests
{
do_embed_tts = localsettings.embed_narrations;
}
if(ssval==XTTS_ID || ssval==ALLTALK_ID || ssval==OAI_TTS_ID || ssval==KCPP_TTS_ID || ssval==POLLINATIONS_TTS_ID) //xtts api server
{
let is_xtts = (ssval==XTTS_ID);
@ -15544,6 +15563,10 @@ Current version indicated by LITEVER below.
{
tts_download(audiofile_ref);
}
if(do_embed_tts)
{
tts_embed_audio(audiofile_ref);
}
const playSound = audioContext.createBufferSource();
playSound.buffer = decodedData;
playSound.connect(audioContext.destination);
@ -15586,6 +15609,10 @@ Current version indicated by LITEVER below.
{
tts_download(audiofile_ref);
}
if(do_embed_tts)
{
tts_embed_audio(audiofile_ref);
}
const playSound = audioContext.createBufferSource();
playSound.buffer = decodedData;
playSound.connect(audioContext.destination);
@ -15629,6 +15656,10 @@ Current version indicated by LITEVER below.
{
tts_download(audiofile_ref);
}
if(do_embed_tts)
{
tts_embed_audio(audiofile_ref);
}
const playSound = audioContext.createBufferSource();
playSound.buffer = decodedData;
playSound.connect(audioContext.destination);
@ -15692,6 +15723,10 @@ Current version indicated by LITEVER below.
{
tts_download(audiofile_ref);
}
if(do_embed_tts)
{
tts_embed_audio(audiofile_ref);
}
playDecodedAllTalkData(decodedData);
})
.catch((error) => {
@ -15743,6 +15778,10 @@ Current version indicated by LITEVER below.
{
tts_download(audiofile_ref);
}
if(do_embed_tts)
{
tts_embed_audio(audiofile_ref);
}
playDecodedAllTalkData(decodedData);
})
.catch((error) => {
@ -15773,6 +15812,10 @@ Current version indicated by LITEVER below.
{
tts_download(audiofile_ref);
}
if(do_embed_tts)
{
tts_embed_audio(audiofile_ref);
}
playDecodedAllTalkData(decodedData);
}).catch((error) => {
console.log("AllTalk v1 Speak Error: " + error);
@ -20297,13 +20340,20 @@ Current version indicated by LITEVER below.
if(elements && elements.length>0)
{
//check if we are on an instruct boundary
let onboundary = false;
if(gametext_arr.length>0 && gametext_arr[gametext_arr.length-1].trim().endsWith("{{[OUTPUT]}}"))
{
onboundary = true;
}
elements.forEach(function (element) {
let temp_stream = synchro_pending_stream;
// let codeblockcount = (temp_stream.match(/```/g) || []).length;
// if(codeblockcount>0 && codeblockcount%2!=0)
// {
// temp_stream += "```"; //force end code block
// }
if (onboundary) {
let codeblockcount = (temp_stream.match(/```/g) || []).length;
if (codeblockcount > 0 && codeblockcount % 2 != 0) {
temp_stream += "```"; //force end code block
}
}
let pend = escape_html(pending_context_preinjection) + format_streaming_text(escape_html(temp_stream));
element.innerHTML = pend;
});
@ -25331,6 +25381,10 @@ Current version indicated by LITEVER below.
<div class="justifyleft settingsmall" title="If unchecked, only speak AI replies, not other text.">Narrate Only Dialog </div>
<input title="Narrate Only Dialog" type="checkbox" id="narrate_only_dialog" style="margin:0px 0px 0px auto;">
</div>
<div class="settinglabel">
<div class="justifyleft settingsmall" title="Try to save and embed TTS narration files into story if possible. Does not work for internal browser TTS. Not recommend due to large size.">Save Narrations </div>
<input title="Save Narrations" type="checkbox" id="embed_narrations" style="margin:0px 0px 0px auto;">
</div>
<div class="inlinelabel" style="font-size: 11px;">
<div class="justifyleft">Browser TTS Speed: </div>
<input title="Browser Narration Speed" type="text" inputmode="decimal" value="1" id="tts_speed" style="width:40px">

View file

@ -4524,7 +4524,7 @@ def show_gui():
chatcompletionsadapter_var = ctk.StringVar(value="AutoGuess")
moeexperts_var = ctk.StringVar(value=str(-1))
moecpu_var = ctk.StringVar(value=str(0))
defaultgenamt_var = ctk.StringVar(value=str(512))
defaultgenamt_var = ctk.StringVar(value=str(640))
nobostoken_var = ctk.IntVar(value=0)
override_kv_var = ctk.StringVar(value="")
override_tensors_var = ctk.StringVar(value="")
@ -5512,7 +5512,7 @@ def show_gui():
args.overridenativecontext = 0
args.moeexperts = int(moeexperts_var.get()) if moeexperts_var.get()!="" else -1
args.moecpu = int(moecpu_var.get()) if moecpu_var.get()!="" else 0
args.defaultgenamt = int(defaultgenamt_var.get()) if defaultgenamt_var.get()!="" else 512
args.defaultgenamt = int(defaultgenamt_var.get()) if defaultgenamt_var.get()!="" else 640
args.nobostoken = (nobostoken_var.get()==1)
args.enableguidance = (enableguidance_var.get()==1)
args.overridekv = None if override_kv_var.get() == "" else override_kv_var.get()
@ -7573,7 +7573,7 @@ if __name__ == '__main__':
advparser.add_argument("--nomodel", help="Allows you to launch the GUI alone, without selecting any model.", action='store_true')
advparser.add_argument("--moeexperts", metavar=('[num of experts]'), help="How many experts to use for MoE models (default=follow gguf)", type=int, default=-1)
advparser.add_argument("--moecpu", metavar=('[layers affected]'), help="Keep the Mixture of Experts (MoE) weights of the first N layers in the CPU. If no value is provided, applies to all layers.", nargs='?', const=999, type=int, default=0)
advparser.add_argument("--defaultgenamt", help="How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.", type=check_range(int,64,8192), default=512)
advparser.add_argument("--defaultgenamt", help="How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.", type=check_range(int,64,8192), default=640)
advparser.add_argument("--nobostoken", help="Prevents BOS token from being added at the start of any prompt. Usually NOT recommended for most models.", action='store_true')
advparser.add_argument("--enableguidance", help="Enables the use of Classifier-Free-Guidance, which allows the use of negative prompts. Has performance and memory impact.", action='store_true')
advparser.add_argument("--maxrequestsize", metavar=('[size in MB]'), help="Specify a max request payload size. Any requests to the server larger than this size will be dropped. Do not change if unsure.", type=int, default=32)

View file

@ -1425,8 +1425,9 @@ int kokoro_runner::generate(std::string prompt, struct tts_response * response,
prompt = replace_any(prompt, ",;:", "--");
prompt = replace_any(prompt, "\n", " ");
kokoro_str_replace_all(prompt," - "," -- ");
kokoro_str_replace_all(prompt,"he's ","he is ");
kokoro_str_replace_all(prompt,"'s ","s ");
kokoro_str_replace_all(prompt,"wasn't ","wasnt ");
kokoro_str_replace_all(prompt,"n't ","nt ");
std::string phonemized_prompt = phmzr->text_to_phonemes(prompt);
// printf("\nRESULT: %s\n",phonemized_prompt.c_str());

View file

@ -851,12 +851,16 @@ bool phonemizer::process_word(corpus* text, std::string* output, std::string wor
size_t unaccented_size_difference = 0;
std::string foundstr = found_word_to_ipa(word);
if(foundstr!="")
if(foundstr!="") //do not use if its part of a contracted word
{
std::string detected = text->next(word.size()+1);
if(detected.back()!='\'')
{
output->append(foundstr);
text->size_pop(word.size());
return true;
}
}
if (has_accent) {
response = dict->lookup(text, word, flags);