From 91b6e29af38afc0660a3a2ae4cfcec3017ea939b Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 9 Jan 2025 23:28:52 +0800 Subject: [PATCH] added multilingual support for whisper --- expose.h | 1 + kcpp_docs.embd | 14 ++++++++++++++ klite.embd | 17 ++++++++++++++--- koboldcpp.py | 4 ++++ otherarch/whispercpp/whisper.cpp | 16 +++++++++++++--- otherarch/whispercpp/whisper_adapter.cpp | 5 +++-- 6 files changed, 49 insertions(+), 8 deletions(-) diff --git a/expose.h b/expose.h index a9bc274e5..a96bdda3b 100644 --- a/expose.h +++ b/expose.h @@ -192,6 +192,7 @@ struct whisper_generation_inputs const char * prompt = nullptr; const char * audio_data = nullptr; const bool suppress_non_speech = false; + const char * langcode = nullptr; const bool quiet = false; }; struct whisper_generation_outputs diff --git a/kcpp_docs.embd b/kcpp_docs.embd index 941ab6a69..77b6c9a35 100644 --- a/kcpp_docs.embd +++ b/kcpp_docs.embd @@ -1344,6 +1344,8 @@ "application/json": { "example": { "prompt": "", + "suppress_non_speech" : false, + "langcode": "en", "audio_data": "base64_wav_data", }, "schema": { @@ -1351,6 +1353,18 @@ "audio_data": { "type": "string", "description": "Base64 respresentation of a 16-bit 16kHz wave file to be transcribed to text." + }, + "prompt": { + "type": "string", + "description": "Prompt to steer the transcription." + }, + "langcode": { + "type": "string", + "description": "Two letter language code, or use auto to autodetect." + }, + "suppress_non_speech": { + "type": "boolean", + "description": "Prevent noise tokens, always generate words for speech." } }, "type": "object" diff --git a/klite.embd b/klite.embd index c8f1ebff5..84ae4d91b 100644 --- a/klite.embd +++ b/klite.embd @@ -12,7 +12,7 @@ Current version indicated by LITEVER below. -->