mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
Fixed some GGUFv1 loading bugs, long overdue cleanup for compiling, integrated TTS
tts is functional (+6 squashed commit) Squashed commit: [22396311] wip tts [3a883027] tts not yet working [0dcfab0e] fix silly bug [a378d9ef] some long overdue cleanup [fc5a6fb5] Wip tts [39f50497] wip TTS integration
This commit is contained in:
parent
12cdcf0abe
commit
b3de1598e7
17 changed files with 1175 additions and 271 deletions
214
koboldcpp.py
214
koboldcpp.py
|
@ -53,6 +53,7 @@ fullsdmodelpath = "" #if empty, it's not initialized
|
|||
mmprojpath = "" #if empty, it's not initialized
|
||||
password = "" #if empty, no auth key required
|
||||
fullwhispermodelpath = "" #if empty, it's not initialized
|
||||
ttsmodelpath = "" #if empty, not initialized
|
||||
maxctx = 4096
|
||||
maxhordectx = 4096
|
||||
maxhordelen = 400
|
||||
|
@ -281,6 +282,26 @@ class whisper_generation_outputs(ctypes.Structure):
|
|||
_fields_ = [("status", ctypes.c_int),
|
||||
("data", ctypes.c_char_p)]
|
||||
|
||||
class tts_load_model_inputs(ctypes.Structure):
|
||||
_fields_ = [("ttc_model_filename", ctypes.c_char_p),
|
||||
("cts_model_filename", ctypes.c_char_p),
|
||||
("executable_path", ctypes.c_char_p),
|
||||
("clblast_info", ctypes.c_int),
|
||||
("cublas_info", ctypes.c_int),
|
||||
("vulkan_info", ctypes.c_char_p),
|
||||
("gpulayers", ctypes.c_int),
|
||||
("debugmode", ctypes.c_int)]
|
||||
|
||||
class tts_generation_inputs(ctypes.Structure):
|
||||
_fields_ = [("prompt", ctypes.c_char_p),
|
||||
("speaker_seed", ctypes.c_int),
|
||||
("audio_seed", ctypes.c_int),
|
||||
("quiet", ctypes.c_bool)]
|
||||
|
||||
class tts_generation_outputs(ctypes.Structure):
|
||||
_fields_ = [("status", ctypes.c_int),
|
||||
("data", ctypes.c_char_p)]
|
||||
|
||||
def getdirpath():
|
||||
return os.path.dirname(os.path.realpath(__file__))
|
||||
def getabspath():
|
||||
|
@ -440,6 +461,10 @@ def init_library():
|
|||
handle.whisper_load_model.restype = ctypes.c_bool
|
||||
handle.whisper_generate.argtypes = [whisper_generation_inputs]
|
||||
handle.whisper_generate.restype = whisper_generation_outputs
|
||||
handle.tts_load_model.argtypes = [tts_load_model_inputs]
|
||||
handle.tts_load_model.restype = ctypes.c_bool
|
||||
handle.tts_generate.argtypes = [tts_generation_inputs]
|
||||
handle.tts_generate.restype = tts_generation_outputs
|
||||
handle.last_logprobs.restype = last_logprobs_outputs
|
||||
handle.detokenize.argtypes = [token_count_outputs]
|
||||
handle.detokenize.restype = ctypes.c_char_p
|
||||
|
@ -577,9 +602,13 @@ def utfprint(str, importance = 2): #0 = only debugmode, 1 = except quiet, 2 = al
|
|||
maxlen = 32000
|
||||
if args.debugmode >= 1:
|
||||
maxlen = 64000
|
||||
strlength = len(str)
|
||||
if strlength > maxlen: #limit max output len
|
||||
str = str[:maxlen] + f"... (+{strlength-maxlen} chars)"
|
||||
try:
|
||||
strlength = len(str)
|
||||
if strlength > maxlen: #limit max output len
|
||||
str = str[:maxlen] + f"... (+{strlength-maxlen} chars)"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
print(str)
|
||||
except UnicodeEncodeError:
|
||||
|
@ -647,13 +676,14 @@ def read_gguf_metadata(file_path):
|
|||
except Exception:
|
||||
return None
|
||||
|
||||
def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath):
|
||||
def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath,ttsmodelpath):
|
||||
global modelfile_extracted_meta
|
||||
modelfile_extracted_meta = None
|
||||
sdfsize = 0
|
||||
whisperfsize = 0
|
||||
mmprojsize = 0
|
||||
draftmodelsize = 0
|
||||
ttsmodelsize = 0
|
||||
if sdfilepath and os.path.exists(sdfilepath):
|
||||
sdfsize = os.path.getsize(sdfilepath)
|
||||
if whisperfilepath and os.path.exists(whisperfilepath):
|
||||
|
@ -662,12 +692,14 @@ def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,
|
|||
mmprojsize = os.path.getsize(mmprojfilepath)
|
||||
if draftmodelpath and os.path.exists(draftmodelpath):
|
||||
draftmodelsize = os.path.getsize(draftmodelpath)
|
||||
if ttsmodelpath and os.path.exists(ttsmodelpath):
|
||||
ttsmodelsize = os.path.getsize(ttsmodelpath)
|
||||
if filepath and os.path.exists(filepath):
|
||||
try:
|
||||
fsize = os.path.getsize(filepath)
|
||||
if fsize>10000000: #dont bother with models < 10mb as they are probably bad
|
||||
ggufmeta = read_gguf_metadata(filepath)
|
||||
modelfile_extracted_meta = [ggufmeta,fsize,sdfsize,whisperfsize,mmprojsize,draftmodelsize] #extract done. note that meta may be null
|
||||
modelfile_extracted_meta = [ggufmeta,fsize,sdfsize,whisperfsize,mmprojsize,draftmodelsize,ttsmodelsize] #extract done. note that meta may be null
|
||||
except Exception:
|
||||
modelfile_extracted_meta = None
|
||||
|
||||
|
@ -699,6 +731,8 @@ def autoset_gpu_layers(ctxsize,sdquanted,bbs): #shitty algo to determine how man
|
|||
mem -= 350*1024*1024
|
||||
if modelfile_extracted_meta[5] > 1024*1024*10: #draft model tax
|
||||
mem -= (modelfile_extracted_meta[5] * 1.5)
|
||||
if modelfile_extracted_meta[6] > 1024*1024*10: #tts model tax
|
||||
mem -= max(600*1024*1024, modelfile_extracted_meta[6] * 3)
|
||||
mem = 0 if mem < 0 else mem
|
||||
|
||||
csmul = 1.0
|
||||
|
@ -730,6 +764,8 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
|||
FetchedCUdevices = []
|
||||
FetchedCUdeviceMem = []
|
||||
FetchedCUfreeMem = []
|
||||
faileddetectvram = False
|
||||
|
||||
AMDgpu = None
|
||||
try: # Get NVIDIA GPU names
|
||||
output = subprocess.run(['nvidia-smi','--query-gpu=name,memory.total,memory.free','--format=csv,noheader'], capture_output=True, text=True, check=True, encoding='utf-8').stdout
|
||||
|
@ -737,6 +773,10 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
|||
FetchedCUdeviceMem = [line.split(",")[1].strip().split(" ")[0].strip() for line in output.splitlines()]
|
||||
FetchedCUfreeMem = [line.split(",")[2].strip().split(" ")[0].strip() for line in output.splitlines()]
|
||||
except Exception:
|
||||
FetchedCUdevices = []
|
||||
FetchedCUdeviceMem = []
|
||||
FetchedCUfreeMem = []
|
||||
faileddetectvram = True
|
||||
pass
|
||||
if len(FetchedCUdevices)==0:
|
||||
try: # Get AMD ROCm GPU names
|
||||
|
@ -756,18 +796,30 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
|||
if getamdvram:
|
||||
FetchedCUdeviceMem = [line.split(",")[1].strip() for line in getamdvram.splitlines()[1:] if line.strip()]
|
||||
except Exception:
|
||||
FetchedCUdevices = []
|
||||
FetchedCUdeviceMem = []
|
||||
FetchedCUfreeMem = []
|
||||
faileddetectvram = True
|
||||
pass
|
||||
lowestcumem = 0
|
||||
lowestfreecumem = 0
|
||||
for idx in range(0,4):
|
||||
if(len(FetchedCUdevices)>idx):
|
||||
CUDevicesNames[idx] = FetchedCUdevices[idx]
|
||||
if len(FetchedCUdeviceMem)>idx:
|
||||
dmem = int(FetchedCUdeviceMem[idx]) if AMDgpu else (int(FetchedCUdeviceMem[idx])*1024*1024)
|
||||
lowestcumem = dmem if lowestcumem==0 else (dmem if dmem<lowestcumem else lowestcumem)
|
||||
if len(FetchedCUfreeMem)>idx:
|
||||
dmem = (int(FetchedCUfreeMem[idx])*1024*1024)
|
||||
lowestfreecumem = dmem if lowestfreecumem==0 else (dmem if dmem<lowestfreecumem else lowestfreecumem)
|
||||
try:
|
||||
for idx in range(0,4):
|
||||
if(len(FetchedCUdevices)>idx):
|
||||
CUDevicesNames[idx] = FetchedCUdevices[idx]
|
||||
if len(FetchedCUdeviceMem)>idx:
|
||||
dmem = int(FetchedCUdeviceMem[idx]) if AMDgpu else (int(FetchedCUdeviceMem[idx])*1024*1024)
|
||||
lowestcumem = dmem if lowestcumem==0 else (dmem if dmem<lowestcumem else lowestcumem)
|
||||
if len(FetchedCUfreeMem)>idx:
|
||||
dmem = (int(FetchedCUfreeMem[idx])*1024*1024)
|
||||
lowestfreecumem = dmem if lowestfreecumem==0 else (dmem if dmem<lowestfreecumem else lowestfreecumem)
|
||||
except Exception:
|
||||
lowestcumem = 0
|
||||
lowestfreecumem = 0
|
||||
faileddetectvram = True
|
||||
|
||||
if faileddetectvram:
|
||||
print("Unable to detect VRAM, please set layers manually.")
|
||||
|
||||
MaxMemory[0] = max(lowestcumem,MaxMemory[0])
|
||||
MaxFreeMemory[0] = max(lowestfreecumem,MaxFreeMemory[0])
|
||||
|
@ -1264,6 +1316,34 @@ def whisper_generate(genparams):
|
|||
outstr = ret.data.decode("UTF-8","ignore")
|
||||
return outstr
|
||||
|
||||
def tts_load_model(ttc_model_filename,cts_model_filename):
|
||||
global args
|
||||
inputs = tts_load_model_inputs()
|
||||
inputs.debugmode = args.debugmode
|
||||
inputs.executable_path = (getdirpath()+"/").encode("UTF-8")
|
||||
inputs.ttc_model_filename = ttc_model_filename.encode("UTF-8")
|
||||
inputs.cts_model_filename = cts_model_filename.encode("UTF-8")
|
||||
inputs.gpulayers = (999 if args.ttsgpu else 0)
|
||||
inputs = set_backend_props(inputs)
|
||||
ret = handle.tts_load_model(inputs)
|
||||
return ret
|
||||
|
||||
def tts_generate(genparams):
|
||||
global args
|
||||
is_quiet = True if (args.quiet or args.debugmode == -1) else False
|
||||
prompt = genparams.get("input", "")
|
||||
prompt = prompt.strip()
|
||||
inputs = tts_generation_inputs()
|
||||
inputs.prompt = prompt.encode("UTF-8")
|
||||
inputs.speaker_seed = 0
|
||||
inputs.audio_seed = 0
|
||||
inputs.quiet = is_quiet
|
||||
ret = handle.tts_generate(inputs)
|
||||
outstr = ""
|
||||
if ret.status==1:
|
||||
outstr = ret.data.decode("UTF-8","ignore")
|
||||
return outstr
|
||||
|
||||
def tokenize_ids(countprompt,tcaddspecial):
|
||||
rawcountdata = handle.token_count(countprompt.encode("UTF-8"),tcaddspecial)
|
||||
countlimit = rawcountdata.count if (rawcountdata.count>=0 and rawcountdata.count<50000) else 0
|
||||
|
@ -1738,10 +1818,11 @@ def LaunchWebbrowser(target_url, failedmsg):
|
|||
try:
|
||||
import webbrowser as wb
|
||||
if wb.open(target_url, autoraise=True):
|
||||
return
|
||||
return
|
||||
raise RuntimeError("Cannot open default browser")
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
try:
|
||||
print(f"Browser failed to launch: {e}, attempting to use xdg-open...")
|
||||
import webbrowser as wb
|
||||
if wb.get('xdg-open').open(target_url, autoraise=True):
|
||||
return
|
||||
|
@ -2102,7 +2183,7 @@ Enter Prompt:<br>
|
|||
|
||||
def do_GET(self):
|
||||
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui
|
||||
global has_multiplayer, multiplayer_turn_major, multiplayer_turn_minor, multiplayer_story_data_compressed, multiplayer_dataformat, multiplayer_lastactive, maxctx, maxhordelen, friendlymodelname, lastgeneratedcomfyimg, KcppVersion, totalgens, preloaded_story, exitcounter, currentusergenkey, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath
|
||||
global has_multiplayer, multiplayer_turn_major, multiplayer_turn_minor, multiplayer_story_data_compressed, multiplayer_dataformat, multiplayer_lastactive, maxctx, maxhordelen, friendlymodelname, lastgeneratedcomfyimg, KcppVersion, totalgens, preloaded_story, exitcounter, currentusergenkey, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath
|
||||
self.path = self.path.rstrip('/')
|
||||
response_body = None
|
||||
content_type = 'application/json'
|
||||
|
@ -2160,7 +2241,8 @@ Enter Prompt:<br>
|
|||
has_password = (password!="")
|
||||
has_whisper = (fullwhispermodelpath!="")
|
||||
has_search = True if args.websearch else False
|
||||
response_body = (json.dumps({"result":"KoboldCpp","version":KcppVersion, "protected":has_password ,"txt2img":has_txt2img,"vision":has_vision,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search}).encode())
|
||||
has_tts = (ttsmodelpath!="")
|
||||
response_body = (json.dumps({"result":"KoboldCpp","version":KcppVersion, "protected":has_password ,"txt2img":has_txt2img,"vision":has_vision,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search,"tts":has_tts}).encode())
|
||||
|
||||
elif self.path.endswith(('/api/extra/perf')):
|
||||
global last_req_time, start_time
|
||||
|
@ -2521,7 +2603,7 @@ Enter Prompt:<br>
|
|||
|
||||
reqblocking = False
|
||||
muint = int(args.multiuser)
|
||||
if muint<=0 and ((args.whispermodel and args.whispermodel!="") or (args.sdmodel and args.sdmodel!="")):
|
||||
if muint<=0 and ((args.whispermodel and args.whispermodel!="") or (args.sdmodel and args.sdmodel!="") or (args.ttsmodel and args.ttsmodel!="")):
|
||||
muint = 2 # this prevents errors when using voice/img together with text
|
||||
multiuserlimit = ((muint-1) if muint > 1 else 6)
|
||||
#backwards compatibility for up to 7 concurrent requests, use default limit of 7 if multiuser set to 1
|
||||
|
@ -2546,6 +2628,7 @@ Enter Prompt:<br>
|
|||
is_imggen = False
|
||||
is_comfyui_imggen = False
|
||||
is_transcribe = False
|
||||
is_tts = False
|
||||
|
||||
if self.path.endswith('/request'):
|
||||
api_format = 1
|
||||
|
@ -2588,11 +2671,14 @@ Enter Prompt:<br>
|
|||
if self.path.endswith('/api/extra/transcribe') or self.path.endswith('/v1/audio/transcriptions'):
|
||||
is_transcribe = True
|
||||
|
||||
if is_imggen or is_transcribe or api_format > 0:
|
||||
if self.path.endswith('/api/extra/tts') or self.path.endswith('/v1/audio/speech'):
|
||||
is_tts = True
|
||||
|
||||
if is_imggen or is_transcribe or is_tts or api_format > 0:
|
||||
global last_req_time
|
||||
last_req_time = time.time()
|
||||
|
||||
if not is_imggen and not is_transcribe and api_format!=5:
|
||||
if not is_imggen and not is_transcribe and not is_tts and api_format!=5:
|
||||
if not self.secure_endpoint():
|
||||
return
|
||||
|
||||
|
@ -2680,6 +2766,21 @@ Enter Prompt:<br>
|
|||
print("Transcribe: The response could not be sent, maybe connection was terminated?")
|
||||
time.sleep(0.2) #short delay
|
||||
return
|
||||
elif is_tts:
|
||||
try:
|
||||
gen = tts_generate(genparams)
|
||||
wav_data = b''
|
||||
if gen:
|
||||
wav_data = base64.b64decode(gen) # Decode the Base64 string into binary data
|
||||
self.send_response(200)
|
||||
self.send_header('content-length', str(len(wav_data))) # Set content length
|
||||
self.end_headers(content_type='audio/wav')
|
||||
self.wfile.write(wav_data) # Write the binary WAV data to the response
|
||||
except Exception as ex:
|
||||
utfprint(ex,0)
|
||||
print("TTS: The response could not be sent, maybe connection was terminated?")
|
||||
time.sleep(0.2) #short delay
|
||||
return
|
||||
|
||||
finally:
|
||||
time.sleep(0.05)
|
||||
|
@ -2806,7 +2907,7 @@ def show_gui():
|
|||
if dlfile:
|
||||
args.model_param = dlfile
|
||||
load_config_cli(args.model_param)
|
||||
if not args.model_param and not args.sdmodel and not args.whispermodel and not args.nomodel:
|
||||
if not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel:
|
||||
global exitcounter
|
||||
exitcounter = 999
|
||||
exit_with_error(2,"No ggml model or kcpps file was selected. Exiting.")
|
||||
|
@ -3008,6 +3109,9 @@ def show_gui():
|
|||
sd_quant_var = ctk.IntVar(value=0)
|
||||
|
||||
whisper_model_var = ctk.StringVar()
|
||||
tts_model_var = ctk.StringVar()
|
||||
wavtokenizer_var = ctk.StringVar()
|
||||
ttsgpu_var = ctk.IntVar(value=0)
|
||||
|
||||
def tabbuttonaction(name):
|
||||
for t in tabcontent:
|
||||
|
@ -3158,7 +3262,8 @@ def show_gui():
|
|||
whisperfilepath = whisper_model_var.get()
|
||||
mmprojfilepath = mmproj_var.get()
|
||||
draftmodelpath = draftmodel_var.get()
|
||||
extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath)
|
||||
ttsmodelpath = tts_model_var.get() if ttsgpu_var.get()==1 else ""
|
||||
extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath,ttsmodelpath)
|
||||
changed_gpulayers_estimate()
|
||||
pass
|
||||
|
||||
|
@ -3575,8 +3680,14 @@ def show_gui():
|
|||
|
||||
# audio tab
|
||||
audio_tab = tabcontent["Audio"]
|
||||
makefileentry(audio_tab, "Whisper Model (Speech-To-Text):", "Select Whisper .bin Model File", whisper_model_var, 1, width=280, filetypes=[("*.bin","*.bin")], tooltiptxt="Select a Whisper .bin model file on disk to be loaded.")
|
||||
makefileentry(audio_tab, "Whisper Model (Speech-To-Text):", "Select Whisper .bin Model File", whisper_model_var, 1, width=280, filetypes=[("*.bin","*.bin")], tooltiptxt="Select a Whisper .bin model file on disk to be loaded for Voice Recognition.")
|
||||
whisper_model_var.trace("w", gui_changed_modelfile)
|
||||
makefileentry(audio_tab, "OuteTTS Model (Text-To-Speech):", "Select OuteTTS GGUF Model File", tts_model_var, 3, width=280, filetypes=[("*.gguf","*.gguf")], tooltiptxt="Select a OuteTTS GGUF model file on disk to be loaded for Narration.")
|
||||
tts_model_var.trace("w", gui_changed_modelfile)
|
||||
makefileentry(audio_tab, "WavTokenizer Model (Text-To-Speech):", "Select WavTokenizer GGUF Model File", wavtokenizer_var, 5, width=280, filetypes=[("*.gguf","*.gguf")], tooltiptxt="Select a WavTokenizer GGUF model file on disk to be loaded for Narration.")
|
||||
wavtokenizer_var.trace("w", gui_changed_modelfile)
|
||||
makecheckbox(audio_tab, "TTS Use GPU", ttsgpu_var, 7, 0,tooltiptxt="Uses the GPU for TTS.")
|
||||
ttsgpu_var.trace("w", gui_changed_modelfile)
|
||||
|
||||
def kcpp_export_template():
|
||||
nonlocal kcpp_exporting_template
|
||||
|
@ -3625,7 +3736,7 @@ def show_gui():
|
|||
|
||||
# launch
|
||||
def guilaunch():
|
||||
if model_var.get() == "" and sd_model_var.get() == "" and whisper_model_var.get() == "" and nomodel.get()!=1:
|
||||
if model_var.get() == "" and sd_model_var.get() == "" and whisper_model_var.get() == "" and tts_model_var.get() == "" and nomodel.get()!=1:
|
||||
tmp = askopenfilename(title="Select ggml model .bin or .gguf file")
|
||||
model_var.set(tmp)
|
||||
nonlocal nextstate
|
||||
|
@ -3792,6 +3903,11 @@ def show_gui():
|
|||
if whisper_model_var.get() != "":
|
||||
args.whispermodel = whisper_model_var.get()
|
||||
|
||||
if tts_model_var.get() != "" and wavtokenizer_var.get() != "":
|
||||
args.ttsmodel = tts_model_var.get()
|
||||
args.ttswavtokenizer = wavtokenizer_var.get()
|
||||
args.ttsgpu = (ttsgpu_var.get()==1)
|
||||
|
||||
def import_vars(dict):
|
||||
global importvars_in_progress
|
||||
importvars_in_progress = True
|
||||
|
@ -3952,6 +4068,10 @@ def show_gui():
|
|||
|
||||
whisper_model_var.set(dict["whispermodel"] if ("whispermodel" in dict and dict["whispermodel"]) else "")
|
||||
|
||||
tts_model_var.set(dict["ttsmodel"] if ("ttsmodel" in dict and dict["ttsmodel"]) else "")
|
||||
wavtokenizer_var.set(dict["ttswavtokenizer"] if ("ttswavtokenizer" in dict and dict["ttswavtokenizer"]) else "")
|
||||
ttsgpu_var.set(dict["ttsgpu"] if ("ttsgpu" in dict) else 0)
|
||||
|
||||
importvars_in_progress = False
|
||||
gui_changed_modelfile()
|
||||
if "istemplate" in dict and dict["istemplate"]:
|
||||
|
@ -4022,7 +4142,7 @@ def show_gui():
|
|||
kcpp_exporting_template = False
|
||||
export_vars()
|
||||
|
||||
if not args.model_param and not args.sdmodel and not args.whispermodel and not args.nomodel:
|
||||
if not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel:
|
||||
exitcounter = 999
|
||||
print("")
|
||||
time.sleep(0.5)
|
||||
|
@ -4566,7 +4686,7 @@ def analyze_gguf_model_wrapper(filename=""):
|
|||
|
||||
def main(launch_args,start_server=True):
|
||||
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui
|
||||
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath
|
||||
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath
|
||||
|
||||
args = launch_args
|
||||
if (args.version) and len(sys.argv) <= 2:
|
||||
|
@ -4629,7 +4749,7 @@ def main(launch_args,start_server=True):
|
|||
if not args.model_param:
|
||||
args.model_param = args.model
|
||||
|
||||
if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.nomodel):
|
||||
if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel):
|
||||
#give them a chance to pick a file
|
||||
print("For command line arguments, please refer to --help")
|
||||
print("***")
|
||||
|
@ -4753,6 +4873,14 @@ def main(launch_args,start_server=True):
|
|||
dlfile = download_model_from_url(args.draftmodel,[".gguf"])
|
||||
if dlfile:
|
||||
args.draftmodel = dlfile
|
||||
if args.ttsmodel and args.ttsmodel!="":
|
||||
dlfile = download_model_from_url(args.ttsmodel,[".gguf"])
|
||||
if dlfile:
|
||||
args.ttsmodel = dlfile
|
||||
if args.ttswavtokenizer and args.ttswavtokenizer!="":
|
||||
dlfile = download_model_from_url(args.ttswavtokenizer,[".gguf"])
|
||||
if dlfile:
|
||||
args.ttswavtokenizer = dlfile
|
||||
|
||||
# sanitize and replace the default vanity name. remember me....
|
||||
if args.model_param and args.model_param!="":
|
||||
|
@ -4830,7 +4958,7 @@ def main(launch_args,start_server=True):
|
|||
pass
|
||||
if args.gpulayers==-1:
|
||||
if MaxMemory[0] > 0 and (not args.usecpu) and ((args.usecublas is not None) or (args.usevulkan is not None) or (args.useclblast is not None) or sys.platform=="darwin"):
|
||||
extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj,args.draftmodel)
|
||||
extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj,args.draftmodel,args.ttsmodel if args.ttsgpu else "")
|
||||
layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.blasbatchsize)
|
||||
print(f"Auto Recommended GPU Layers: {layeramt}")
|
||||
args.gpulayers = layeramt
|
||||
|
@ -4999,6 +5127,27 @@ def main(launch_args,start_server=True):
|
|||
exitcounter = 999
|
||||
exit_with_error(3,"Could not load whisper model: " + whispermodel)
|
||||
|
||||
#handle tts model
|
||||
if args.ttsmodel and args.ttsmodel!="" and args.ttswavtokenizer and args.ttswavtokenizer!="":
|
||||
if not os.path.exists(args.ttsmodel) or not os.path.exists(args.ttswavtokenizer):
|
||||
if args.ignoremissing:
|
||||
print("Ignoring missing TTS model files!")
|
||||
args.ttsmodel = None
|
||||
args.ttswavtokenizer = None
|
||||
else:
|
||||
exitcounter = 999
|
||||
exit_with_error(2,f"Cannot find tts model files: {args.ttsmodel} or {args.ttswavtokenizer}")
|
||||
else:
|
||||
ttsmodelpath = args.ttsmodel
|
||||
ttsmodelpath = os.path.abspath(ttsmodelpath)
|
||||
wavtokpath = args.ttswavtokenizer
|
||||
wavtokpath = os.path.abspath(wavtokpath)
|
||||
loadok = tts_load_model(ttsmodelpath,wavtokpath)
|
||||
print("Load TTS Model OK: " + str(loadok))
|
||||
if not loadok:
|
||||
exitcounter = 999
|
||||
exit_with_error(3,"Could not load TTS model!")
|
||||
|
||||
|
||||
#load embedded lite
|
||||
try:
|
||||
|
@ -5296,7 +5445,12 @@ if __name__ == '__main__':
|
|||
sdparsergroup.add_argument("--sdnotile", help="Disables VAE tiling, may not work for large images.", action='store_true')
|
||||
|
||||
whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands')
|
||||
whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper bin model to enable Speech-To-Text transcription.", default="")
|
||||
whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper .bin model to enable Speech-To-Text transcription.", default="")
|
||||
|
||||
ttsparsergroup = parser.add_argument_group('TTS Narration Commands')
|
||||
ttsparsergroup.add_argument("--ttsmodel", metavar=('[filename]'), help="Specify the OuteTTS Text-To-Speech GGUF model.", default="")
|
||||
ttsparsergroup.add_argument("--ttswavtokenizer", metavar=('[filename]'), help="Specify the WavTokenizer GGUF model.", default="")
|
||||
ttsparsergroup.add_argument("--ttsgpu", help="Use the GPU for TTS.", action='store_true')
|
||||
|
||||
deprecatedgroup = parser.add_argument_group('Deprecated Commands, DO NOT USE!')
|
||||
deprecatedgroup.add_argument("--hordeconfig", help=argparse.SUPPRESS, nargs='+')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue