diff --git a/kcpp_docs.embd b/kcpp_docs.embd
index 77b6c9a35..f8f88fd96 100644
--- a/kcpp_docs.embd
+++ b/kcpp_docs.embd
@@ -616,6 +616,8 @@
"vision": false,
"transcribe":false,
"multiplayer": false,
+ "websearch":false,
+ "tts":false,
},
"schema": {
"$ref": "#/components/schemas/KcppVersion"
@@ -1443,6 +1445,52 @@
]
}
},
+ "/api/extra/tts": {
+ "post": {
+ "description": "Creates text-to-speech audio from input text.",
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "example": {
+ "input": "hello world, how are you today?",
+ "voice": "fire",
+ },
+ "schema": {
+ "properties": {
+ "input": {
+ "type": "string",
+ "description": "The text to generate audio for. Try to keep it short."
+ },
+ "voice": {
+ "type": "string",
+ "description": "The voice to use when generating the audio. You can enter anything you like, a qunique speaker will be generated."
+ }
+ },
+ "type": "object"
+ }
+ }
+ },
+ "required": true
+ },
+ "responses": {
+ "200": {
+ "content": {
+ "audio/wav": {
+ "schema": {
+ "type": "string",
+ "format": "binary"
+ }
+ }
+ },
+ "description": "Successful request"
+ }
+ },
+ "summary": "Creates text-to-speech audio from input text.",
+ "tags": [
+ "api/extra"
+ ]
+ }
+ },
"/props": {
"get": {
"summary": "Returns the Jinja template stored in the GGUF model, if found.",
@@ -1840,6 +1888,16 @@
"responses": {"default": {"description": ""}}
}
},
+ "/v1/audio/speech": {
+ "post": {
+ "summary": "Generates Text-To-Speech audio from input text. Please refer to OpenAI documentation",
+ "description": "Generates Text-To-Speech audio from input text.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/audio/createSpeech](https://platform.openai.com/docs/api-reference/audio/createSpeech)",
+ "tags": [
+ "v1"
+ ],
+ "responses": {"default": {"description": ""}}
+ }
+ },
},
"servers": [
{
diff --git a/klite.embd b/klite.embd
index 84ae4d91b..7896c073c 100644
--- a/klite.embd
+++ b/klite.embd
@@ -2030,6 +2030,15 @@ Current version indicated by LITEVER below.
.color_orangeurl:focus {
color: #ffedd3;
}
+ .color_grayurl {
+ color: #9e9e9e;
+ }
+ .color_grayurl:hover {
+ color: #9f9f9f;
+ }
+ .color_grayurl:focus {
+ color: #9e9e9e;
+ }
.color_orange {
color: #f7a223;
@@ -2793,7 +2802,8 @@ Current version indicated by LITEVER below.
const koboldcpp_transcribe_endpoint = "/api/extra/transcribe";
const koboldcpp_tokenize_endpoint = "/api/extra/tokencount";
const koboldcpp_perf_endpoint = "/api/extra/perf";
- const koboldcpp_websearch_endpoint = "/api/extra/websearch"
+ const koboldcpp_websearch_endpoint = "/api/extra/websearch";
+ const koboldcpp_tts_endpoint = "/api/extra/tts";
const oai_models_endpoint = "/models";
const oai_submit_endpoint = "/completions";
@@ -2853,6 +2863,7 @@ Current version indicated by LITEVER below.
const XTTS_ID = 1000;
const ALLTALK_ID = 1001;
const OAI_TTS_ID = 1002;
+ const KCPP_TTS_ID = 1003;
const HD_RES_PX = 768;
const NO_HD_RES_PX = 512;
const AVATAR_PX = 384;
@@ -2965,6 +2976,7 @@ Current version indicated by LITEVER below.
var voice_is_processing = false; //currently processing voice?
let voiceprerecorder = null, voicerecorder = null, voice_is_speaking = false, voice_speaking_counter = 0;
let preaudiobuffers = [], preaudioblobs = []; //will store 2 preblobs at a time
+ var koboldcpp_has_tts = false;
var no_escape_html = false;
var timetaken_timestamp = performance.now();
var bg_silence = null;
@@ -3587,7 +3599,7 @@ Current version indicated by LITEVER below.
document.getElementById("lastreq1").innerHTML =
document.getElementById("lastreq2").innerHTML =
document.getElementById("lastreq3").innerHTML =
- `KoboldAI Lite v${LITEVER} Web - Frontend for External API Services`;
+ `KoboldAI Lite v${LITEVER} Web - Frontend for External API Services`;
trigger_abort_controller(); //first trigger sets it up
@@ -5840,6 +5852,10 @@ initializeInstructUIFunctionality();
{
return (custom_kobold_endpoint!="" && koboldcpp_version && koboldcpp_version!="" && compare_version_str(koboldcpp_version, "1.80") >= 0 && koboldcpp_has_websearch);
}
+ function is_using_kcpp_with_tts()
+ {
+ return (custom_kobold_endpoint!="" && koboldcpp_version && koboldcpp_version!="" && compare_version_str(koboldcpp_version, "1.81") >= 0 && koboldcpp_has_tts);
+ }
function is_using_web_lite()
{
return (window.location.hostname.includes("koboldai.net") || window.location.hostname.includes("lostruins.github.io"));
@@ -9207,6 +9223,7 @@ initializeInstructUIFunctionality();
koboldcpp_has_whisper = (data.transcribe?true:false);
koboldcpp_has_multiplayer = (data.multiplayer?true:false);
koboldcpp_has_websearch = (data.websearch?true:false);
+ koboldcpp_has_tts = (data.tts?true:false);
let has_password = (data.protected?true:false);
let has_txt2img = (data.txt2img?true:false);
let no_txt_model = (mdlname=="inactive");
@@ -9315,7 +9332,7 @@ initializeInstructUIFunctionality();
},()=>{
});
}
- else if(localflag && no_txt_model && !has_txt2img && !koboldcpp_has_vision && !koboldcpp_has_whisper)
+ else if(localflag && no_txt_model && !has_txt2img && !koboldcpp_has_vision && !koboldcpp_has_whisper && !koboldcpp_has_tts)
{
msgboxYesNo("This KoboldCpp instance has no models loaded. You can still use the WebUI to edit or view existing stories.
Would you like to connect to an external API service?","No Models Loaded",
()=>{
@@ -10311,6 +10328,8 @@ initializeInstructUIFunctionality();
ttshtml += "";
ttshtml += "";
ttshtml += "";
+ ttshtml += "";
+
if ('speechSynthesis' in window) {
let voices = window.speechSynthesis.getVoices();
console.log("speech synth available: " + voices.length);
@@ -11894,6 +11913,7 @@ initializeInstructUIFunctionality();
document.getElementById("xtts_container").classList.add("hidden");
document.getElementById("oai_tts_container").classList.add("hidden");
document.getElementById("alltalk_specific_controls").classList.add("hidden");
+ document.getElementById("kcpp_tts_container").classList.add("hidden");
const selectedTTS = document.getElementById("ttsselect").value;
@@ -11910,6 +11930,15 @@ initializeInstructUIFunctionality();
else if(selectedTTS == OAI_TTS_ID) {
document.getElementById("oai_tts_container").classList.remove("hidden");
}
+ else if(selectedTTS == KCPP_TTS_ID) {
+ document.getElementById("kcpp_tts_container").classList.remove("hidden");
+ if(is_using_kcpp_with_tts())
+ {
+ document.getElementById("nokcpptts").classList.add("hidden");
+ }else{
+ document.getElementById("nokcpptts").classList.remove("hidden");
+ }
+ }
}
// Fetch RVC voices for AllTalk
@@ -12014,27 +12043,44 @@ initializeInstructUIFunctionality();
}
}
- if(ssval==XTTS_ID || ssval==ALLTALK_ID || ssval==OAI_TTS_ID) //xtts api server
+ if(ssval==XTTS_ID || ssval==ALLTALK_ID || ssval==OAI_TTS_ID || ssval==KCPP_TTS_ID) //xtts api server
{
let is_xtts = (ssval==XTTS_ID);
let is_oai_tts = (ssval==OAI_TTS_ID);
+ let is_kcpp_tts = (ssval==KCPP_TTS_ID);
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
- if(is_oai_tts)
+ if(is_oai_tts || is_kcpp_tts)
{
- let payload =
+ let payload = {};
+ let ttsheaders = {};
+ let sub_endpt = "";
+ if(is_oai_tts)
{
- "model": document.getElementById("oai_tts_model").value,
- "input": text,
- "voice": document.getElementById("oai_tts_voice").value
- };
- let oaiheaders = {
- 'Content-Type': 'application/json',
- 'Authorization': 'Bearer ' + localsettings.saved_oai_tts_key
- };
- fetch(localsettings.saved_oai_tts_url, {
+ sub_endpt = localsettings.saved_oai_tts_url;
+ payload =
+ {
+ "model": document.getElementById("oai_tts_model").value,
+ "input": text,
+ "voice": document.getElementById("oai_tts_voice").value
+ };
+ ttsheaders = {
+ 'Content-Type': 'application/json',
+ 'Authorization': 'Bearer ' + localsettings.saved_oai_tts_key
+ };
+ } else {
+ sub_endpt = apply_proxy_url(custom_kobold_endpoint + koboldcpp_tts_endpoint);
+ payload =
+ {
+ "input": text,
+ "voice": document.getElementById("kcpp_tts_voice").value
+ };
+ ttsheaders = get_kobold_header();
+ }
+
+ fetch(sub_endpt, {
method: 'POST',
- headers: oaiheaders,
+ headers: ttsheaders,
body: JSON.stringify(payload),
})
.then(response => response.arrayBuffer())
@@ -20199,6 +20245,14 @@ initializeInstructUIFunctionality();
TTS Voice |