Fixed some GGUFv1 loading bugs, long overdue cleanup for compiling, integrated TTS

tts is functional (+6 squashed commit) Squashed commit: [22396311] wip tts [3a883027] tts not yet working [0dcfab0e] fix silly bug [a378d9ef] some long overdue cleanup [fc5a6fb5] Wip tts [39f50497] wip TTS integration
2025-09-11 01:24:36 +00:00 · 2025-01-12 16:33:02 +08:00 · 2025-01-12 16:33:02 +08:00 · b3de1598e7
commit b3de1598e7
parent 12cdcf0abe
17 changed files with 1175 additions and 271 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -53,6 +53,7 @@ fullsdmodelpath = ""  #if empty, it's not initialized
 mmprojpath = "" #if empty, it's not initialized
 password = "" #if empty, no auth key required
 fullwhispermodelpath = "" #if empty, it's not initialized
+ttsmodelpath = "" #if empty, not initialized
 maxctx = 4096
 maxhordectx = 4096
 maxhordelen = 400
@ -281,6 +282,26 @@ class whisper_generation_outputs(ctypes.Structure):
    _fields_ = [("status", ctypes.c_int),
                ("data", ctypes.c_char_p)]

+class tts_load_model_inputs(ctypes.Structure):
+    _fields_ = [("ttc_model_filename", ctypes.c_char_p),
+                ("cts_model_filename", ctypes.c_char_p),
+                ("executable_path", ctypes.c_char_p),
+                ("clblast_info", ctypes.c_int),
+                ("cublas_info", ctypes.c_int),
+                ("vulkan_info", ctypes.c_char_p),
+                ("gpulayers", ctypes.c_int),
+                ("debugmode", ctypes.c_int)]
+
+class tts_generation_inputs(ctypes.Structure):
+    _fields_ = [("prompt", ctypes.c_char_p),
+                ("speaker_seed", ctypes.c_int),
+                ("audio_seed", ctypes.c_int),
+                ("quiet", ctypes.c_bool)]
+
+class tts_generation_outputs(ctypes.Structure):
+    _fields_ = [("status", ctypes.c_int),
+                ("data", ctypes.c_char_p)]
+
 def getdirpath():
    return os.path.dirname(os.path.realpath(__file__))
 def getabspath():
@ -440,6 +461,10 @@ def init_library():
    handle.whisper_load_model.restype = ctypes.c_bool
    handle.whisper_generate.argtypes = [whisper_generation_inputs]
    handle.whisper_generate.restype = whisper_generation_outputs
+    handle.tts_load_model.argtypes = [tts_load_model_inputs]
+    handle.tts_load_model.restype = ctypes.c_bool
+    handle.tts_generate.argtypes = [tts_generation_inputs]
+    handle.tts_generate.restype = tts_generation_outputs
    handle.last_logprobs.restype = last_logprobs_outputs
    handle.detokenize.argtypes = [token_count_outputs]
    handle.detokenize.restype = ctypes.c_char_p
@ -577,9 +602,13 @@ def utfprint(str, importance = 2): #0 = only debugmode, 1 = except quiet, 2 = al
    maxlen = 32000
    if args.debugmode >= 1:
        maxlen = 64000
-    strlength = len(str)
-    if strlength > maxlen: #limit max output len
-        str = str[:maxlen] + f"... (+{strlength-maxlen} chars)"
+    try:
+        strlength = len(str)
+        if strlength > maxlen: #limit max output len
+            str = str[:maxlen] + f"... (+{strlength-maxlen} chars)"
+    except Exception:
+        pass
+
    try:
        print(str)
    except UnicodeEncodeError:
@ -647,13 +676,14 @@ def read_gguf_metadata(file_path):
    except Exception:
        return None

-def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath):
+def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath,ttsmodelpath):
    global modelfile_extracted_meta
    modelfile_extracted_meta = None
    sdfsize = 0
    whisperfsize = 0
    mmprojsize = 0
    draftmodelsize = 0
+    ttsmodelsize = 0
    if sdfilepath and os.path.exists(sdfilepath):
        sdfsize = os.path.getsize(sdfilepath)
    if whisperfilepath and os.path.exists(whisperfilepath):
@ -662,12 +692,14 @@ def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,
        mmprojsize = os.path.getsize(mmprojfilepath)
    if draftmodelpath and os.path.exists(draftmodelpath):
        draftmodelsize = os.path.getsize(draftmodelpath)
+    if ttsmodelpath and os.path.exists(ttsmodelpath):
+        ttsmodelsize = os.path.getsize(ttsmodelpath)
    if filepath and os.path.exists(filepath):
        try:
            fsize = os.path.getsize(filepath)
            if fsize>10000000: #dont bother with models < 10mb as they are probably bad
                ggufmeta = read_gguf_metadata(filepath)
-                modelfile_extracted_meta = [ggufmeta,fsize,sdfsize,whisperfsize,mmprojsize,draftmodelsize] #extract done. note that meta may be null
+                modelfile_extracted_meta = [ggufmeta,fsize,sdfsize,whisperfsize,mmprojsize,draftmodelsize,ttsmodelsize] #extract done. note that meta may be null
        except Exception:
            modelfile_extracted_meta = None

@ -699,6 +731,8 @@ def autoset_gpu_layers(ctxsize,sdquanted,bbs): #shitty algo to determine how man
                mem -= 350*1024*1024
            if modelfile_extracted_meta[5] > 1024*1024*10: #draft model tax
                mem -= (modelfile_extracted_meta[5] * 1.5)
+            if modelfile_extracted_meta[6] > 1024*1024*10: #tts model tax
+                mem -= max(600*1024*1024, modelfile_extracted_meta[6] * 3)
            mem = 0 if mem < 0 else mem

            csmul = 1.0
@ -730,6 +764,8 @@ def fetch_gpu_properties(testCL,testCU,testVK):
        FetchedCUdevices = []
        FetchedCUdeviceMem = []
        FetchedCUfreeMem = []
+        faileddetectvram = False
+
        AMDgpu = None
        try: # Get NVIDIA GPU names
            output = subprocess.run(['nvidia-smi','--query-gpu=name,memory.total,memory.free','--format=csv,noheader'], capture_output=True, text=True, check=True, encoding='utf-8').stdout
@ -737,6 +773,10 @@ def fetch_gpu_properties(testCL,testCU,testVK):
            FetchedCUdeviceMem = [line.split(",")[1].strip().split(" ")[0].strip() for line in output.splitlines()]
            FetchedCUfreeMem = [line.split(",")[2].strip().split(" ")[0].strip() for line in output.splitlines()]
        except Exception:
+            FetchedCUdevices = []
+            FetchedCUdeviceMem = []
+            FetchedCUfreeMem = []
+            faileddetectvram = True
            pass
        if len(FetchedCUdevices)==0:
            try: # Get AMD ROCm GPU names
@ -756,18 +796,30 @@ def fetch_gpu_properties(testCL,testCU,testVK):
                    if getamdvram:
                        FetchedCUdeviceMem = [line.split(",")[1].strip() for line in getamdvram.splitlines()[1:] if line.strip()]
            except Exception:
+                FetchedCUdevices = []
+                FetchedCUdeviceMem = []
+                FetchedCUfreeMem = []
+                faileddetectvram = True
                pass
        lowestcumem = 0
        lowestfreecumem = 0
-        for idx in range(0,4):
-            if(len(FetchedCUdevices)>idx):
-                CUDevicesNames[idx] = FetchedCUdevices[idx]
-                if len(FetchedCUdeviceMem)>idx:
-                    dmem = int(FetchedCUdeviceMem[idx]) if AMDgpu else (int(FetchedCUdeviceMem[idx])*1024*1024)
-                    lowestcumem = dmem if lowestcumem==0 else (dmem if dmem<lowestcumem else lowestcumem)
-                if len(FetchedCUfreeMem)>idx:
-                    dmem = (int(FetchedCUfreeMem[idx])*1024*1024)
-                    lowestfreecumem = dmem if lowestfreecumem==0 else (dmem if dmem<lowestfreecumem else lowestfreecumem)
+        try:
+            for idx in range(0,4):
+                if(len(FetchedCUdevices)>idx):
+                    CUDevicesNames[idx] = FetchedCUdevices[idx]
+                    if len(FetchedCUdeviceMem)>idx:
+                        dmem = int(FetchedCUdeviceMem[idx]) if AMDgpu else (int(FetchedCUdeviceMem[idx])*1024*1024)
+                        lowestcumem = dmem if lowestcumem==0 else (dmem if dmem<lowestcumem else lowestcumem)
+                    if len(FetchedCUfreeMem)>idx:
+                        dmem = (int(FetchedCUfreeMem[idx])*1024*1024)
+                        lowestfreecumem = dmem if lowestfreecumem==0 else (dmem if dmem<lowestfreecumem else lowestfreecumem)
+        except Exception:
+            lowestcumem = 0
+            lowestfreecumem = 0
+            faileddetectvram = True
+
+        if faileddetectvram:
+            print("Unable to detect VRAM, please set layers manually.")

        MaxMemory[0] = max(lowestcumem,MaxMemory[0])
        MaxFreeMemory[0] = max(lowestfreecumem,MaxFreeMemory[0])
@ -1264,6 +1316,34 @@ def whisper_generate(genparams):
        outstr = ret.data.decode("UTF-8","ignore")
    return outstr

+def tts_load_model(ttc_model_filename,cts_model_filename):
+    global args
+    inputs = tts_load_model_inputs()
+    inputs.debugmode = args.debugmode
+    inputs.executable_path = (getdirpath()+"/").encode("UTF-8")
+    inputs.ttc_model_filename = ttc_model_filename.encode("UTF-8")
+    inputs.cts_model_filename = cts_model_filename.encode("UTF-8")
+    inputs.gpulayers = (999 if args.ttsgpu else 0)
+    inputs = set_backend_props(inputs)
+    ret = handle.tts_load_model(inputs)
+    return ret
+
+def tts_generate(genparams):
+    global args
+    is_quiet = True if (args.quiet or args.debugmode == -1) else False
+    prompt = genparams.get("input", "")
+    prompt = prompt.strip()
+    inputs = tts_generation_inputs()
+    inputs.prompt = prompt.encode("UTF-8")
+    inputs.speaker_seed = 0
+    inputs.audio_seed = 0
+    inputs.quiet = is_quiet
+    ret = handle.tts_generate(inputs)
+    outstr = ""
+    if ret.status==1:
+        outstr = ret.data.decode("UTF-8","ignore")
+    return outstr
+
 def tokenize_ids(countprompt,tcaddspecial):
    rawcountdata = handle.token_count(countprompt.encode("UTF-8"),tcaddspecial)
    countlimit = rawcountdata.count if (rawcountdata.count>=0 and rawcountdata.count<50000) else 0
@ -1738,10 +1818,11 @@ def LaunchWebbrowser(target_url, failedmsg):
    try:
        import webbrowser as wb
        if wb.open(target_url, autoraise=True):
-          return
+            return
        raise RuntimeError("Cannot open default browser")
-    except Exception:
+    except Exception as e:
        try:
+            print(f"Browser failed to launch: {e}, attempting to use xdg-open...")
            import webbrowser as wb
            if wb.get('xdg-open').open(target_url, autoraise=True):
                return
@ -2102,7 +2183,7 @@ Enter Prompt:<br>

    def do_GET(self):
        global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui
-        global has_multiplayer, multiplayer_turn_major, multiplayer_turn_minor, multiplayer_story_data_compressed, multiplayer_dataformat, multiplayer_lastactive, maxctx, maxhordelen, friendlymodelname, lastgeneratedcomfyimg, KcppVersion, totalgens, preloaded_story, exitcounter, currentusergenkey, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath
+        global has_multiplayer, multiplayer_turn_major, multiplayer_turn_minor, multiplayer_story_data_compressed, multiplayer_dataformat, multiplayer_lastactive, maxctx, maxhordelen, friendlymodelname, lastgeneratedcomfyimg, KcppVersion, totalgens, preloaded_story, exitcounter, currentusergenkey, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath
        self.path = self.path.rstrip('/')
        response_body = None
        content_type = 'application/json'
@ -2160,7 +2241,8 @@ Enter Prompt:<br>
            has_password = (password!="")
            has_whisper = (fullwhispermodelpath!="")
            has_search = True if args.websearch else False
-            response_body = (json.dumps({"result":"KoboldCpp","version":KcppVersion, "protected":has_password ,"txt2img":has_txt2img,"vision":has_vision,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search}).encode())
+            has_tts = (ttsmodelpath!="")
+            response_body = (json.dumps({"result":"KoboldCpp","version":KcppVersion, "protected":has_password ,"txt2img":has_txt2img,"vision":has_vision,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search,"tts":has_tts}).encode())

        elif self.path.endswith(('/api/extra/perf')):
            global last_req_time, start_time
@ -2521,7 +2603,7 @@ Enter Prompt:<br>

        reqblocking = False
        muint = int(args.multiuser)
-        if muint<=0 and ((args.whispermodel and args.whispermodel!="") or (args.sdmodel and args.sdmodel!="")):
+        if muint<=0 and ((args.whispermodel and args.whispermodel!="") or (args.sdmodel and args.sdmodel!="") or (args.ttsmodel and args.ttsmodel!="")):
            muint = 2 # this prevents errors when using voice/img together with text
        multiuserlimit = ((muint-1) if muint > 1 else 6)
        #backwards compatibility for up to 7 concurrent requests, use default limit of 7 if multiuser set to 1
@ -2546,6 +2628,7 @@ Enter Prompt:<br>
            is_imggen = False
            is_comfyui_imggen = False
            is_transcribe = False
+            is_tts = False

            if self.path.endswith('/request'):
                api_format = 1
@ -2588,11 +2671,14 @@ Enter Prompt:<br>
            if self.path.endswith('/api/extra/transcribe') or self.path.endswith('/v1/audio/transcriptions'):
                is_transcribe = True

-            if is_imggen or is_transcribe or api_format > 0:
+            if self.path.endswith('/api/extra/tts') or self.path.endswith('/v1/audio/speech'):
+                is_tts = True
+
+            if is_imggen or is_transcribe or is_tts or api_format > 0:
                global last_req_time
                last_req_time = time.time()

-                if not is_imggen and not is_transcribe and api_format!=5:
+                if not is_imggen and not is_transcribe and not is_tts and api_format!=5:
                    if not self.secure_endpoint():
                        return

@ -2680,6 +2766,21 @@ Enter Prompt:<br>
                        print("Transcribe: The response could not be sent, maybe connection was terminated?")
                        time.sleep(0.2) #short delay
                    return
+                elif is_tts:
+                    try:
+                        gen = tts_generate(genparams)
+                        wav_data = b''
+                        if gen:
+                            wav_data = base64.b64decode(gen) # Decode the Base64 string into binary data
+                        self.send_response(200)
+                        self.send_header('content-length', str(len(wav_data)))  # Set content length
+                        self.end_headers(content_type='audio/wav')
+                        self.wfile.write(wav_data) # Write the binary WAV data to the response
+                    except Exception as ex:
+                        utfprint(ex,0)
+                        print("TTS: The response could not be sent, maybe connection was terminated?")
+                        time.sleep(0.2) #short delay
+                    return

        finally:
            time.sleep(0.05)
@ -2806,7 +2907,7 @@ def show_gui():
            if dlfile:
                args.model_param = dlfile
            load_config_cli(args.model_param)
-        if not args.model_param and not args.sdmodel and not args.whispermodel and not args.nomodel:
+        if not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel:
            global exitcounter
            exitcounter = 999
            exit_with_error(2,"No ggml model or kcpps file was selected. Exiting.")
@ -3008,6 +3109,9 @@ def show_gui():
    sd_quant_var = ctk.IntVar(value=0)

    whisper_model_var = ctk.StringVar()
+    tts_model_var = ctk.StringVar()
+    wavtokenizer_var = ctk.StringVar()
+    ttsgpu_var = ctk.IntVar(value=0)

    def tabbuttonaction(name):
        for t in tabcontent:
@ -3158,7 +3262,8 @@ def show_gui():
            whisperfilepath = whisper_model_var.get()
            mmprojfilepath = mmproj_var.get()
            draftmodelpath = draftmodel_var.get()
-            extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath)
+            ttsmodelpath = tts_model_var.get() if ttsgpu_var.get()==1 else ""
+            extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath,ttsmodelpath)
            changed_gpulayers_estimate()
        pass

@ -3575,8 +3680,14 @@ def show_gui():

    # audio tab
    audio_tab = tabcontent["Audio"]
-    makefileentry(audio_tab, "Whisper Model (Speech-To-Text):", "Select Whisper .bin Model File", whisper_model_var, 1, width=280, filetypes=[("*.bin","*.bin")], tooltiptxt="Select a Whisper .bin model file on disk to be loaded.")
+    makefileentry(audio_tab, "Whisper Model (Speech-To-Text):", "Select Whisper .bin Model File", whisper_model_var, 1, width=280, filetypes=[("*.bin","*.bin")], tooltiptxt="Select a Whisper .bin model file on disk to be loaded for Voice Recognition.")
    whisper_model_var.trace("w", gui_changed_modelfile)
+    makefileentry(audio_tab, "OuteTTS Model (Text-To-Speech):", "Select OuteTTS GGUF Model File", tts_model_var, 3, width=280, filetypes=[("*.gguf","*.gguf")], tooltiptxt="Select a OuteTTS GGUF model file on disk to be loaded for Narration.")
+    tts_model_var.trace("w", gui_changed_modelfile)
+    makefileentry(audio_tab, "WavTokenizer Model (Text-To-Speech):", "Select WavTokenizer GGUF Model File", wavtokenizer_var, 5, width=280, filetypes=[("*.gguf","*.gguf")], tooltiptxt="Select a WavTokenizer GGUF model file on disk to be loaded for Narration.")
+    wavtokenizer_var.trace("w", gui_changed_modelfile)
+    makecheckbox(audio_tab, "TTS Use GPU", ttsgpu_var, 7, 0,tooltiptxt="Uses the GPU for TTS.")
+    ttsgpu_var.trace("w", gui_changed_modelfile)

    def kcpp_export_template():
        nonlocal kcpp_exporting_template
@ -3625,7 +3736,7 @@ def show_gui():

    # launch
    def guilaunch():
-        if model_var.get() == "" and sd_model_var.get() == "" and whisper_model_var.get() == "" and nomodel.get()!=1:
+        if model_var.get() == "" and sd_model_var.get() == "" and whisper_model_var.get() == "" and tts_model_var.get() == "" and nomodel.get()!=1:
            tmp = askopenfilename(title="Select ggml model .bin or .gguf file")
            model_var.set(tmp)
        nonlocal nextstate
@ -3792,6 +3903,11 @@ def show_gui():
        if whisper_model_var.get() != "":
            args.whispermodel = whisper_model_var.get()

+        if tts_model_var.get() != "" and wavtokenizer_var.get() != "":
+            args.ttsmodel = tts_model_var.get()
+            args.ttswavtokenizer = wavtokenizer_var.get()
+            args.ttsgpu = (ttsgpu_var.get()==1)
+
    def import_vars(dict):
        global importvars_in_progress
        importvars_in_progress = True
@ -3952,6 +4068,10 @@ def show_gui():

        whisper_model_var.set(dict["whispermodel"] if ("whispermodel" in dict and dict["whispermodel"]) else "")

+        tts_model_var.set(dict["ttsmodel"] if ("ttsmodel" in dict and dict["ttsmodel"]) else "")
+        wavtokenizer_var.set(dict["ttswavtokenizer"] if ("ttswavtokenizer" in dict and dict["ttswavtokenizer"]) else "")
+        ttsgpu_var.set(dict["ttsgpu"] if ("ttsgpu" in dict) else 0)
+
        importvars_in_progress = False
        gui_changed_modelfile()
        if "istemplate" in dict and dict["istemplate"]:
@ -4022,7 +4142,7 @@ def show_gui():
        kcpp_exporting_template = False
        export_vars()

-        if not args.model_param and not args.sdmodel and not args.whispermodel and not args.nomodel:
+        if not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel:
            exitcounter = 999
            print("")
            time.sleep(0.5)
@ -4566,7 +4686,7 @@ def analyze_gguf_model_wrapper(filename=""):

 def main(launch_args,start_server=True):
    global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui
-    global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath
+    global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath

    args = launch_args
    if (args.version) and len(sys.argv) <= 2:
@ -4629,7 +4749,7 @@ def main(launch_args,start_server=True):
    if not args.model_param:
        args.model_param = args.model

-    if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.nomodel):
+    if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel):
        #give them a chance to pick a file
        print("For command line arguments, please refer to --help")
        print("***")
@ -4753,6 +4873,14 @@ def main(launch_args,start_server=True):
        dlfile = download_model_from_url(args.draftmodel,[".gguf"])
        if dlfile:
            args.draftmodel = dlfile
+    if args.ttsmodel and args.ttsmodel!="":
+        dlfile = download_model_from_url(args.ttsmodel,[".gguf"])
+        if dlfile:
+            args.ttsmodel = dlfile
+    if args.ttswavtokenizer and args.ttswavtokenizer!="":
+        dlfile = download_model_from_url(args.ttswavtokenizer,[".gguf"])
+        if dlfile:
+            args.ttswavtokenizer = dlfile

    # sanitize and replace the default vanity name. remember me....
    if args.model_param and args.model_param!="":
@ -4830,7 +4958,7 @@ def main(launch_args,start_server=True):
                pass
            if args.gpulayers==-1:
                if MaxMemory[0] > 0 and (not args.usecpu) and ((args.usecublas is not None) or (args.usevulkan is not None) or (args.useclblast is not None) or sys.platform=="darwin"):
-                    extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj,args.draftmodel)
+                    extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj,args.draftmodel,args.ttsmodel if args.ttsgpu else "")
                    layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.blasbatchsize)
                    print(f"Auto Recommended GPU Layers: {layeramt}")
                    args.gpulayers = layeramt
@ -4999,6 +5127,27 @@ def main(launch_args,start_server=True):
                exitcounter = 999
                exit_with_error(3,"Could not load whisper model: " + whispermodel)

+    #handle tts model
+    if args.ttsmodel and args.ttsmodel!="" and args.ttswavtokenizer and args.ttswavtokenizer!="":
+        if not os.path.exists(args.ttsmodel) or not os.path.exists(args.ttswavtokenizer):
+            if args.ignoremissing:
+                print("Ignoring missing TTS model files!")
+                args.ttsmodel = None
+                args.ttswavtokenizer = None
+            else:
+                exitcounter = 999
+                exit_with_error(2,f"Cannot find tts model files: {args.ttsmodel} or {args.ttswavtokenizer}")
+        else:
+            ttsmodelpath = args.ttsmodel
+            ttsmodelpath = os.path.abspath(ttsmodelpath)
+            wavtokpath = args.ttswavtokenizer
+            wavtokpath = os.path.abspath(wavtokpath)
+            loadok = tts_load_model(ttsmodelpath,wavtokpath)
+            print("Load TTS Model OK: " + str(loadok))
+            if not loadok:
+                exitcounter = 999
+                exit_with_error(3,"Could not load TTS model!")
+

    #load embedded lite
    try:
@ -5296,7 +5445,12 @@ if __name__ == '__main__':
    sdparsergroup.add_argument("--sdnotile", help="Disables VAE tiling, may not work for large images.", action='store_true')

    whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands')
-    whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper bin model to enable Speech-To-Text transcription.", default="")
+    whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper .bin model to enable Speech-To-Text transcription.", default="")
+
+    ttsparsergroup = parser.add_argument_group('TTS Narration Commands')
+    ttsparsergroup.add_argument("--ttsmodel", metavar=('[filename]'), help="Specify the OuteTTS Text-To-Speech GGUF model.", default="")
+    ttsparsergroup.add_argument("--ttswavtokenizer", metavar=('[filename]'), help="Specify the WavTokenizer GGUF model.", default="")
+    ttsparsergroup.add_argument("--ttsgpu", help="Use the GPU for TTS.", action='store_true')

    deprecatedgroup = parser.add_argument_group('Deprecated Commands, DO NOT USE!')
    deprecatedgroup.add_argument("--hordeconfig", help=argparse.SUPPRESS, nargs='+')