diff --git a/README.md b/README.md index ec2d1c436..fe082829a 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ when you can't use the precompiled binary directly, we provide an automated buil - Note: Many OSX users have found that the using Accelerate is actually faster than OpenBLAS. To try, you may wish to run with `--noblas` and compare speeds. ### Arch Linux Packages -There are some community made AUR packages (Maintained by @AlpinDale) available: [CUBLAS](https://aur.archlinux.org/packages/koboldcpp-cuda), and [HIPBLAS](https://aur.archlinux.org/packages/koboldcpp-hipblas). They are intended for users with NVIDIA GPUs, and users with a supported AMD GPU. Note that these packages may be outdated, and it's probably better to use official KoboldCpp binaries. +There are some community made AUR packages available: [CUBLAS](https://aur.archlinux.org/packages/koboldcpp-cuda), and [HIPBLAS](https://aur.archlinux.org/packages/koboldcpp-hipblas). They are intended for users with NVIDIA GPUs, and users with a supported AMD GPU. Note that these packages may be outdated, and it's probably better to use official KoboldCpp binaries. ## Compiling on Windows - You're encouraged to use the .exe released, but if you want to compile your binaries from source at Windows, the easiest way is: diff --git a/klite.embd b/klite.embd index 8c0c4162b..0fa20ac52 100644 --- a/klite.embd +++ b/klite.embd @@ -3747,10 +3747,12 @@ Current version: 143 var logitbiasdict = {}; var regexreplace_data = []; var placeholder_tags_data = []; - var voice_typing_enabled = false; //did user toggle on voice typing + var voice_typing_mode = 0; //0=off, 1=on, 2=ptt var koboldcpp_has_whisper = false; //does backend support voice typing var voice_is_recording = false; //currently recording voice? var voice_is_processing = false; //currently processing voice? + let voiceprerecorder = null, voicerecorder = null, voice_is_speaking = false, voice_speaking_counter = 0; + let preaudiobuffers = [], preaudioblobs = []; //will store 2 preblobs at a time const num_regex_rows = 4; var localsettings = { @@ -8996,8 +8998,8 @@ Current version: 143 localsettings.sampler_seed = cleannum(localsettings.sampler_seed, -1, 999999); toggle_invert_colors(); - voice_typing_enabled = (document.getElementById("voice_typing_mode").checked?true:false); - if(voice_typing_enabled && is_using_kcpp_with_whisper()) + voice_typing_mode = document.getElementById("voice_typing_mode").value; + if(voice_typing_mode>0 && is_using_kcpp_with_whisper()) { init_voice_typing(); } @@ -10161,6 +10163,66 @@ Current version: 143 } } + function ptt_start() + { + if(voice_typing_mode>0) + { + voice_is_speaking = true; + ++voice_speaking_counter; + if(ready_to_record()) + { + if (voicerecorder.state === "inactive") { + if (voiceprerecorder.state !== "inactive") { + voiceprerecorder.stop(); + } + voicerecorder.start(); + } + voice_is_recording = true; + update_submit_button(false); + } + } + } + function ptt_end() + { + if(voice_typing_mode>0) + { + voice_is_speaking = false; + let check_speak_counter = voice_speaking_counter; + setTimeout(() => { + if (voice_is_recording && !voice_is_speaking && voice_speaking_counter == check_speak_counter) { + //generate prerecorder blobs (prebuffer 1sec) + preaudioblobs = []; + if(voice_typing_mode==1) + { + for(let i=0;i0 && is_using_kcpp_with_whisper() && !document.getElementById("btnsend").disabled && !voice_is_processing && !voice_is_recording && isVoiceInputConfigured && !currentlySpeaking && !xtts_is_playing && !is_popup_open()); @@ -12523,7 +12585,7 @@ Current version: 143 if(navigator.mediaDevices==null) { msgbox("Cannot initialize microphone. If you're using a non-localhost URL, it needs to be served over HTTPS!","Error Starting Microphone"); - voice_typing_enabled = document.getElementById("voice_typing_mode").checked = false; + voice_typing_mode = document.getElementById("voice_typing_mode").checked = 0; return; } if (isVoiceInputConfigured) { @@ -12609,7 +12671,6 @@ Current version: 143 return tmp; } - let prerecorder, preaudiobuffers = [], preaudioblobs = []; //will store 2 preblobs at a time let onRecordingReady = function (e) { let completeRecording = new Blob([e.data], { type: 'audio/webm' }); let audiodatareader = new window.FileReader(); @@ -12617,7 +12678,7 @@ Current version: 143 if(preaudioblobs.length<2) { audioBlobToDecodedAudioBuffer(completeRecording,(buffer)=>{ - let wavblob = audioBufferToWavBlob(finalbuf); + let wavblob = audioBufferToWavBlob(buffer); audiodatareader.readAsDataURL(wavblob); }); } else { @@ -12640,13 +12701,12 @@ Current version: 143 } - let recorder, is_speaking = false, speaking_counter = 0; // get audio stream from user's mic navigator.mediaDevices.getUserMedia({ audio: true }).then(function (stream) { - prerecorder = new MediaRecorder(stream); - prerecorder.addEventListener('dataavailable', (ev)=>{ + voiceprerecorder = new MediaRecorder(stream); + voiceprerecorder.addEventListener('dataavailable', (ev)=>{ preaudiobuffers.push(ev.data); if(preaudiobuffers.length>2) { @@ -12654,55 +12714,33 @@ Current version: 143 } }); setInterval(()=>{ - if (prerecorder.state !== "inactive") { - prerecorder.stop(); + if (voiceprerecorder.state !== "inactive") { + voiceprerecorder.stop(); } - if(ready_to_record()){ - prerecorder.start(); + if(ready_to_record() && voice_typing_mode==1){ //only voice detect needs it + voiceprerecorder.start(); } }, 500); - recorder = new MediaRecorder(stream); - recorder.addEventListener('dataavailable', onRecordingReady); + voicerecorder = new MediaRecorder(stream); + voicerecorder.addEventListener('dataavailable', onRecordingReady); window.AudioContext = window.AudioContext || window.webkitAudioContext; let audioContext = new AudioContext({ sampleRate: 16000 }); let source = audioContext.createMediaStreamSource(stream); let options = { source: source, voice_stop: function () { - is_speaking = false; - let check_speak_counter = speaking_counter; - console.log("speech stopped"); - setTimeout(() => { - if (voice_is_recording && !is_speaking && speaking_counter == check_speak_counter) { - //generate prerecorder blobs (prebuffer 1sec) - preaudioblobs = []; - for(let i=0;i0 && is_using_kcpp_with_whisper()) { if (voice_is_processing) { document.getElementById("chat_msg_send_btn").classList.add("showmicoff"); @@ -13259,7 +13298,7 @@ Current version: 143 document.getElementById("btnsend").innerHTML = "
Record"; } else if (ready_to_record()) { document.getElementById("chat_msg_send_btn").classList.add("showmic"); - document.getElementById("btnsend").innerHTML = "
Standby"; + document.getElementById("btnsend").innerHTML = "
"+(voice_typing_mode==1?"Standby":"PTT")+""; } else { document.getElementById("chat_msg_send_btn").classList.add("showmicoff"); document.getElementById("btnsend").innerHTML = "
Busy"; @@ -15227,7 +15266,7 @@ Current version: 143
+ onclick="submit_generation_button(false)" onmousedown="ptt_start()" onmouseup="ptt_end()">Loading
@@ -15261,7 +15300,7 @@ Current version: 143 - + @@ -16103,9 +16142,13 @@ Current version: 143
-
Voice Typing Mode ?Requires KoboldCpp with Whisper model loaded. Enables Speech-To-Text voice input. Automatically submits text after input.
- +
Speech Control ?Requires KoboldCpp with Whisper model loaded. Enables Speech-To-Text voice input. Automatically listens for speech in 'On' mode (Voice Detection), or use Push-To-Talk (PTT).
+