mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
split audio and vision detection separately
This commit is contained in:
parent
0938af7c83
commit
811463a704
5 changed files with 82 additions and 21 deletions
21
koboldcpp.py
21
koboldcpp.py
|
@ -105,6 +105,7 @@ runmode_untouched = True
|
|||
modelfile_extracted_meta = None
|
||||
importvars_in_progress = False
|
||||
has_multiplayer = False
|
||||
has_audio_support = False
|
||||
savedata_obj = None
|
||||
multiplayer_story_data_compressed = None #stores the full compressed story of the current multiplayer session
|
||||
multiplayer_turn_major = 1 # to keep track of when a client needs to sync their stories
|
||||
|
@ -537,6 +538,7 @@ def init_library():
|
|||
handle.new_token.argtypes = [ctypes.c_int]
|
||||
handle.get_stream_count.restype = ctypes.c_int
|
||||
handle.has_finished.restype = ctypes.c_bool
|
||||
handle.has_audio_support.restype = ctypes.c_bool
|
||||
handle.get_last_eval_time.restype = ctypes.c_float
|
||||
handle.get_last_process_time.restype = ctypes.c_float
|
||||
handle.get_last_token_count.restype = ctypes.c_int
|
||||
|
@ -889,7 +891,7 @@ def convert_json_to_gbnf(json_obj):
|
|||
return ""
|
||||
|
||||
def get_capabilities():
|
||||
global savedata_obj, has_multiplayer, KcppVersion, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath
|
||||
global savedata_obj, has_multiplayer, KcppVersion, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath, has_audio_support
|
||||
has_llm = not (friendlymodelname=="inactive")
|
||||
has_txt2img = not (friendlysdmodelname=="inactive" or fullsdmodelpath=="")
|
||||
has_vision = (mmprojpath!="")
|
||||
|
@ -900,7 +902,7 @@ def get_capabilities():
|
|||
has_embeddings = (embeddingsmodelpath!="")
|
||||
has_guidance = True if args.enableguidance else False
|
||||
admin_type = (2 if args.admin and args.admindir and args.adminpassword else (1 if args.admin and args.admindir else 0))
|
||||
return {"result":"KoboldCpp", "version":KcppVersion, "protected":has_password, "llm":has_llm, "txt2img":has_txt2img,"vision":has_vision,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search,"tts":has_tts, "embeddings":has_embeddings, "savedata":(savedata_obj is not None), "admin": admin_type, "guidance": has_guidance}
|
||||
return {"result":"KoboldCpp", "version":KcppVersion, "protected":has_password, "llm":has_llm, "txt2img":has_txt2img,"vision":has_vision,"audio":has_audio_support,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search,"tts":has_tts, "embeddings":has_embeddings, "savedata":(savedata_obj is not None), "admin": admin_type, "guidance": has_guidance}
|
||||
|
||||
def dump_gguf_metadata(file_path): #if you're gonna copy this into your own project at least credit concedo
|
||||
chunk_size = 1024*1024*12 # read first 12mb of file
|
||||
|
@ -2287,6 +2289,7 @@ def transform_genparams(genparams, api_format):
|
|||
tools_message_start = adapter_obj.get("tools_start", "")
|
||||
tools_message_end = adapter_obj.get("tools_end", "")
|
||||
images_added = []
|
||||
audio_added = []
|
||||
jsongrammar = r"""
|
||||
root ::= arr
|
||||
value ::= object | array | string | number | ("true" | "false" | "null") ws
|
||||
|
@ -2362,6 +2365,10 @@ ws ::= | " " | "\n" [ \t]{0,20}
|
|||
if 'image_url' in item and item['image_url'] and item['image_url']['url'] and item['image_url']['url'].startswith("data:image"):
|
||||
images_added.append(item['image_url']['url'].split(",", 1)[1])
|
||||
messages_string += "\n(Attached Image)\n"
|
||||
elif item['type']=="input_audio":
|
||||
if 'input_audio' in item and item['input_audio'] and item['input_audio']['data']:
|
||||
audio_added.append(item['input_audio']['data'])
|
||||
messages_string += "\n(Attached Audio)\n"
|
||||
# If last message, add any tools calls after message content and before message end token if any
|
||||
if message['role'] == "user" and message_index == len(messages_array):
|
||||
# tools handling: Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
|
||||
|
@ -2464,6 +2471,8 @@ ws ::= | " " | "\n" [ \t]{0,20}
|
|||
genparams["prompt"] = messages_string
|
||||
if len(images_added)>0:
|
||||
genparams["images"] = images_added
|
||||
if len(audio_added)>0:
|
||||
genparams["audio"] = audio_added
|
||||
if len(genparams.get('stop_sequence', []))==0: #only set stop seq if it wont overwrite existing
|
||||
genparams["stop_sequence"] = [user_message_start.strip(),assistant_message_start.strip()]
|
||||
else:
|
||||
|
@ -4947,7 +4956,7 @@ def show_gui():
|
|||
changed_gpu_choice_var()
|
||||
|
||||
# presets selector
|
||||
makelabel(quick_tab, "Presets:", 1,0,"Select a backend to use.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nVulkan and CLBlast works on all GPUs but is somewhat slower.\nOtherwise, runs on CPU only.\nNoAVX2 and Failsafe modes support older PCs.")
|
||||
makelabel(quick_tab, "Backend:", 1,0,"Select a backend to use.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nVulkan and CLBlast works on all GPUs but is somewhat slower.\nOtherwise, runs on CPU only.\nNoAVX2 and Failsafe modes support older PCs.")
|
||||
|
||||
runoptbox = ctk.CTkComboBox(quick_tab, values=runopts, width=190,variable=runopts_var, state="readonly")
|
||||
runoptbox.grid(row=1, column=1,padx=8, stick="nw")
|
||||
|
@ -4995,7 +5004,7 @@ def show_gui():
|
|||
hardware_tab = tabcontent["Hardware"]
|
||||
|
||||
# presets selector
|
||||
makelabel(hardware_tab, "Presets:", 1,0,"Select a backend to use.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nVulkan and CLBlast works on all GPUs but is somewhat slower.\nOtherwise, runs on CPU only.\nNoAVX2 and Failsafe modes support older PCs.")
|
||||
makelabel(hardware_tab, "Backend:", 1,0,"Select a backend to use.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nVulkan and CLBlast works on all GPUs but is somewhat slower.\nOtherwise, runs on CPU only.\nNoAVX2 and Failsafe modes support older PCs.")
|
||||
runoptbox = ctk.CTkComboBox(hardware_tab, values=runopts, width=180,variable=runopts_var, state="readonly")
|
||||
runoptbox.grid(row=1, column=1,padx=8, stick="nw")
|
||||
runoptbox.set(runopts[0]) # Set to first available option
|
||||
|
@ -6577,7 +6586,7 @@ def main(launch_args, default_args):
|
|||
|
||||
def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
||||
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui, start_time, exitcounter, global_memory, using_gui_launcher
|
||||
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath, friendlyembeddingsmodelname
|
||||
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath, friendlyembeddingsmodelname, has_audio_support
|
||||
|
||||
start_server = True
|
||||
|
||||
|
@ -6934,7 +6943,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
|||
print("WARNING: Selected Text Model does not seem to be a GGUF file! Are you sure you picked the right file?")
|
||||
loadok = load_model(modelname)
|
||||
print("Load Text Model OK: " + str(loadok))
|
||||
|
||||
has_audio_support = handle.has_audio_support() # multimodal audio support is only known at runtime
|
||||
if not loadok:
|
||||
exitcounter = 999
|
||||
exit_with_error(3,"Could not load text model: " + modelname)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue