mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-08 08:09:06 +00:00
clearer multimodal capability display
This commit is contained in:
parent
12a6088a65
commit
abf527a207
4 changed files with 28 additions and 13 deletions
|
@ -272,6 +272,10 @@ extern "C"
|
||||||
{
|
{
|
||||||
return audio_multimodal_supported;
|
return audio_multimodal_supported;
|
||||||
}
|
}
|
||||||
|
bool has_vision_support()
|
||||||
|
{
|
||||||
|
return vision_multimodal_supported;
|
||||||
|
}
|
||||||
float get_last_eval_time() {
|
float get_last_eval_time() {
|
||||||
return last_eval_time;
|
return last_eval_time;
|
||||||
}
|
}
|
||||||
|
|
1
expose.h
1
expose.h
|
@ -288,6 +288,7 @@ extern std::string draftmodel_filename;
|
||||||
extern std::vector<std::string> generated_tokens;
|
extern std::vector<std::string> generated_tokens;
|
||||||
extern bool generation_finished;
|
extern bool generation_finished;
|
||||||
extern bool audio_multimodal_supported;
|
extern bool audio_multimodal_supported;
|
||||||
|
extern bool vision_multimodal_supported;
|
||||||
extern float last_eval_time;
|
extern float last_eval_time;
|
||||||
extern float last_process_time;
|
extern float last_process_time;
|
||||||
extern int last_token_count;
|
extern int last_token_count;
|
||||||
|
|
|
@ -58,6 +58,7 @@ std::string draftmodel_filename = "";
|
||||||
int speculative_chunk_amt = 8; //do it in chunks of this many tokens
|
int speculative_chunk_amt = 8; //do it in chunks of this many tokens
|
||||||
bool generation_finished;
|
bool generation_finished;
|
||||||
bool audio_multimodal_supported = false;
|
bool audio_multimodal_supported = false;
|
||||||
|
bool vision_multimodal_supported = false;
|
||||||
float last_process_time = 0;
|
float last_process_time = 0;
|
||||||
float last_eval_time = 0;
|
float last_eval_time = 0;
|
||||||
int last_token_count = 0;
|
int last_token_count = 0;
|
||||||
|
@ -1985,6 +1986,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
draft_ctx = nullptr;
|
draft_ctx = nullptr;
|
||||||
guidance_ctx = nullptr;
|
guidance_ctx = nullptr;
|
||||||
audio_multimodal_supported = false;
|
audio_multimodal_supported = false;
|
||||||
|
vision_multimodal_supported = false;
|
||||||
|
|
||||||
auto clamped_max_context_length = inputs.max_context_length;
|
auto clamped_max_context_length = inputs.max_context_length;
|
||||||
|
|
||||||
|
@ -2464,6 +2466,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
return ModelLoadResult::FAIL;
|
return ModelLoadResult::FAIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(clp_ctx_v)
|
||||||
|
{
|
||||||
|
vision_multimodal_supported = true;
|
||||||
|
}
|
||||||
|
clp_img_data = clip_image_u8_init();
|
||||||
if(clp_ctx_a) //init audio
|
if(clp_ctx_a) //init audio
|
||||||
{
|
{
|
||||||
if (clip_has_whisper_encoder(clp_ctx_a)) {
|
if (clip_has_whisper_encoder(clp_ctx_a)) {
|
||||||
|
@ -2472,7 +2479,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
}
|
}
|
||||||
audio_multimodal_supported = true;
|
audio_multimodal_supported = true;
|
||||||
}
|
}
|
||||||
clp_img_data = clip_image_u8_init();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const llama_vocab * tmpvocab = llama_model_get_vocab(llamamodel);
|
const llama_vocab * tmpvocab = llama_model_get_vocab(llamamodel);
|
||||||
|
|
28
koboldcpp.py
28
koboldcpp.py
|
@ -76,7 +76,6 @@ friendlyembeddingsmodelname = "inactive"
|
||||||
lastgeneratedcomfyimg = b''
|
lastgeneratedcomfyimg = b''
|
||||||
lastuploadedcomfyimg = b''
|
lastuploadedcomfyimg = b''
|
||||||
fullsdmodelpath = "" #if empty, it's not initialized
|
fullsdmodelpath = "" #if empty, it's not initialized
|
||||||
mmprojpath = "" #if empty, it's not initialized
|
|
||||||
password = "" #if empty, no auth key required
|
password = "" #if empty, no auth key required
|
||||||
fullwhispermodelpath = "" #if empty, it's not initialized
|
fullwhispermodelpath = "" #if empty, it's not initialized
|
||||||
ttsmodelpath = "" #if empty, not initialized
|
ttsmodelpath = "" #if empty, not initialized
|
||||||
|
@ -106,6 +105,7 @@ modelfile_extracted_meta = None
|
||||||
importvars_in_progress = False
|
importvars_in_progress = False
|
||||||
has_multiplayer = False
|
has_multiplayer = False
|
||||||
has_audio_support = False
|
has_audio_support = False
|
||||||
|
has_vision_support = False
|
||||||
savedata_obj = None
|
savedata_obj = None
|
||||||
multiplayer_story_data_compressed = None #stores the full compressed story of the current multiplayer session
|
multiplayer_story_data_compressed = None #stores the full compressed story of the current multiplayer session
|
||||||
multiplayer_turn_major = 1 # to keep track of when a client needs to sync their stories
|
multiplayer_turn_major = 1 # to keep track of when a client needs to sync their stories
|
||||||
|
@ -539,6 +539,7 @@ def init_library():
|
||||||
handle.get_stream_count.restype = ctypes.c_int
|
handle.get_stream_count.restype = ctypes.c_int
|
||||||
handle.has_finished.restype = ctypes.c_bool
|
handle.has_finished.restype = ctypes.c_bool
|
||||||
handle.has_audio_support.restype = ctypes.c_bool
|
handle.has_audio_support.restype = ctypes.c_bool
|
||||||
|
handle.has_vision_support.restype = ctypes.c_bool
|
||||||
handle.get_last_eval_time.restype = ctypes.c_float
|
handle.get_last_eval_time.restype = ctypes.c_float
|
||||||
handle.get_last_process_time.restype = ctypes.c_float
|
handle.get_last_process_time.restype = ctypes.c_float
|
||||||
handle.get_last_token_count.restype = ctypes.c_int
|
handle.get_last_token_count.restype = ctypes.c_int
|
||||||
|
@ -891,10 +892,9 @@ def convert_json_to_gbnf(json_obj):
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def get_capabilities():
|
def get_capabilities():
|
||||||
global savedata_obj, has_multiplayer, KcppVersion, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath, has_audio_support
|
global savedata_obj, has_multiplayer, KcppVersion, friendlymodelname, friendlysdmodelname, fullsdmodelpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath, has_audio_support, has_vision_support
|
||||||
has_llm = not (friendlymodelname=="inactive")
|
has_llm = not (friendlymodelname=="inactive")
|
||||||
has_txt2img = not (friendlysdmodelname=="inactive" or fullsdmodelpath=="")
|
has_txt2img = not (friendlysdmodelname=="inactive" or fullsdmodelpath=="")
|
||||||
has_vision = (mmprojpath!="")
|
|
||||||
has_password = (password!="")
|
has_password = (password!="")
|
||||||
has_whisper = (fullwhispermodelpath!="")
|
has_whisper = (fullwhispermodelpath!="")
|
||||||
has_search = True if args.websearch else False
|
has_search = True if args.websearch else False
|
||||||
|
@ -902,7 +902,7 @@ def get_capabilities():
|
||||||
has_embeddings = (embeddingsmodelpath!="")
|
has_embeddings = (embeddingsmodelpath!="")
|
||||||
has_guidance = True if args.enableguidance else False
|
has_guidance = True if args.enableguidance else False
|
||||||
admin_type = (2 if args.admin and args.admindir and args.adminpassword else (1 if args.admin and args.admindir else 0))
|
admin_type = (2 if args.admin and args.admindir and args.adminpassword else (1 if args.admin and args.admindir else 0))
|
||||||
return {"result":"KoboldCpp", "version":KcppVersion, "protected":has_password, "llm":has_llm, "txt2img":has_txt2img,"vision":has_vision,"audio":has_audio_support,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search,"tts":has_tts, "embeddings":has_embeddings, "savedata":(savedata_obj is not None), "admin": admin_type, "guidance": has_guidance}
|
return {"result":"KoboldCpp", "version":KcppVersion, "protected":has_password, "llm":has_llm, "txt2img":has_txt2img,"vision":has_vision_support,"audio":has_audio_support,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search,"tts":has_tts, "embeddings":has_embeddings, "savedata":(savedata_obj is not None), "admin": admin_type, "guidance": has_guidance}
|
||||||
|
|
||||||
def dump_gguf_metadata(file_path): #if you're gonna copy this into your own project at least credit concedo
|
def dump_gguf_metadata(file_path): #if you're gonna copy this into your own project at least credit concedo
|
||||||
chunk_size = 1024*1024*12 # read first 12mb of file
|
chunk_size = 1024*1024*12 # read first 12mb of file
|
||||||
|
@ -3120,7 +3120,7 @@ Change Mode<br>
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui
|
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui
|
||||||
global last_req_time, start_time
|
global last_req_time, start_time
|
||||||
global savedata_obj, has_multiplayer, multiplayer_turn_major, multiplayer_turn_minor, multiplayer_story_data_compressed, multiplayer_dataformat, multiplayer_lastactive, maxctx, maxhordelen, friendlymodelname, lastuploadedcomfyimg, lastgeneratedcomfyimg, KcppVersion, totalgens, preloaded_story, exitcounter, currentusergenkey, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, friendlyembeddingsmodelname
|
global savedata_obj, has_multiplayer, multiplayer_turn_major, multiplayer_turn_minor, multiplayer_story_data_compressed, multiplayer_dataformat, multiplayer_lastactive, maxctx, maxhordelen, friendlymodelname, lastuploadedcomfyimg, lastgeneratedcomfyimg, KcppVersion, totalgens, preloaded_story, exitcounter, currentusergenkey, friendlysdmodelname, fullsdmodelpath, password, friendlyembeddingsmodelname
|
||||||
self.path = self.path.rstrip('/')
|
self.path = self.path.rstrip('/')
|
||||||
response_body = None
|
response_body = None
|
||||||
content_type = 'application/json'
|
content_type = 'application/json'
|
||||||
|
@ -3370,7 +3370,7 @@ Change Mode<br>
|
||||||
return
|
return
|
||||||
|
|
||||||
def do_POST(self):
|
def do_POST(self):
|
||||||
global modelbusy, requestsinqueue, currentusergenkey, totalgens, pendingabortkey, lastuploadedcomfyimg, lastgeneratedcomfyimg, multiplayer_turn_major, multiplayer_turn_minor, multiplayer_story_data_compressed, multiplayer_dataformat, multiplayer_lastactive, net_save_slots
|
global modelbusy, requestsinqueue, currentusergenkey, totalgens, pendingabortkey, lastuploadedcomfyimg, lastgeneratedcomfyimg, multiplayer_turn_major, multiplayer_turn_minor, multiplayer_story_data_compressed, multiplayer_dataformat, multiplayer_lastactive, net_save_slots, has_vision_support
|
||||||
contlenstr = self.headers['content-length']
|
contlenstr = self.headers['content-length']
|
||||||
content_length = 0
|
content_length = 0
|
||||||
body = None
|
body = None
|
||||||
|
@ -3846,8 +3846,7 @@ Change Mode<br>
|
||||||
elif self.path.endswith('/v1/chat/completions'):
|
elif self.path.endswith('/v1/chat/completions'):
|
||||||
api_format = 4
|
api_format = 4
|
||||||
elif self.path.endswith('/sdapi/v1/interrogate'):
|
elif self.path.endswith('/sdapi/v1/interrogate'):
|
||||||
has_vision = (mmprojpath!="")
|
if not has_vision_support:
|
||||||
if not has_vision:
|
|
||||||
self.send_response(503)
|
self.send_response(503)
|
||||||
self.end_headers(content_type='application/json')
|
self.end_headers(content_type='application/json')
|
||||||
self.wfile.write(json.dumps({"detail": {
|
self.wfile.write(json.dumps({"detail": {
|
||||||
|
@ -6641,7 +6640,7 @@ def main(launch_args, default_args):
|
||||||
|
|
||||||
def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
||||||
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui, start_time, exitcounter, global_memory, using_gui_launcher
|
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui, start_time, exitcounter, global_memory, using_gui_launcher
|
||||||
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath, friendlyembeddingsmodelname, has_audio_support
|
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath, friendlyembeddingsmodelname, has_audio_support, has_vision_support
|
||||||
|
|
||||||
start_server = True
|
start_server = True
|
||||||
|
|
||||||
|
@ -6982,9 +6981,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
||||||
exitcounter = 999
|
exitcounter = 999
|
||||||
exit_with_error(2,f"Cannot find mmproj file: {args.mmproj}")
|
exit_with_error(2,f"Cannot find mmproj file: {args.mmproj}")
|
||||||
else:
|
else:
|
||||||
global mmprojpath
|
|
||||||
args.mmproj = os.path.abspath(args.mmproj)
|
args.mmproj = os.path.abspath(args.mmproj)
|
||||||
mmprojpath = args.mmproj
|
|
||||||
|
|
||||||
if not args.blasthreads or args.blasthreads <= 0:
|
if not args.blasthreads or args.blasthreads <= 0:
|
||||||
args.blasthreads = args.threads
|
args.blasthreads = args.threads
|
||||||
|
@ -6998,7 +6995,13 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
||||||
print("WARNING: Selected Text Model does not seem to be a GGUF file! Are you sure you picked the right file?")
|
print("WARNING: Selected Text Model does not seem to be a GGUF file! Are you sure you picked the right file?")
|
||||||
loadok = load_model(modelname)
|
loadok = load_model(modelname)
|
||||||
print("Load Text Model OK: " + str(loadok))
|
print("Load Text Model OK: " + str(loadok))
|
||||||
has_audio_support = handle.has_audio_support() # multimodal audio support is only known at runtime
|
if args.mmproj and args.mmproj!="": # multimodal vision and audio support is only known at runtime
|
||||||
|
has_audio_support = handle.has_audio_support()
|
||||||
|
has_vision_support = handle.has_vision_support()
|
||||||
|
else:
|
||||||
|
has_audio_support = False
|
||||||
|
has_vision_support = False
|
||||||
|
|
||||||
if not loadok:
|
if not loadok:
|
||||||
exitcounter = 999
|
exitcounter = 999
|
||||||
exit_with_error(3,"Could not load text model: " + modelname)
|
exit_with_error(3,"Could not load text model: " + modelname)
|
||||||
|
@ -7193,6 +7196,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
||||||
enabledmlist.append("ImageGeneration") if "txt2img" in caps and caps["txt2img"] else disabledmlist.append("ImageGeneration")
|
enabledmlist.append("ImageGeneration") if "txt2img" in caps and caps["txt2img"] else disabledmlist.append("ImageGeneration")
|
||||||
enabledmlist.append("VoiceRecognition") if "transcribe" in caps and caps["transcribe"] else disabledmlist.append("VoiceRecognition")
|
enabledmlist.append("VoiceRecognition") if "transcribe" in caps and caps["transcribe"] else disabledmlist.append("VoiceRecognition")
|
||||||
enabledmlist.append("MultimodalVision") if "vision" in caps and caps["vision"] else disabledmlist.append("MultimodalVision")
|
enabledmlist.append("MultimodalVision") if "vision" in caps and caps["vision"] else disabledmlist.append("MultimodalVision")
|
||||||
|
enabledmlist.append("MultimodalAudio") if "audio" in caps and caps["audio"] else disabledmlist.append("MultimodalAudio")
|
||||||
enabledmlist.append("NetworkMultiplayer") if "multiplayer" in caps and caps["multiplayer"] else disabledmlist.append("NetworkMultiplayer")
|
enabledmlist.append("NetworkMultiplayer") if "multiplayer" in caps and caps["multiplayer"] else disabledmlist.append("NetworkMultiplayer")
|
||||||
enabledmlist.append("ApiKeyPassword") if "protected" in caps and caps["protected"] else disabledmlist.append("ApiKeyPassword")
|
enabledmlist.append("ApiKeyPassword") if "protected" in caps and caps["protected"] else disabledmlist.append("ApiKeyPassword")
|
||||||
enabledmlist.append("WebSearchProxy") if "websearch" in caps and caps["websearch"] else disabledmlist.append("WebSearchProxy")
|
enabledmlist.append("WebSearchProxy") if "websearch" in caps and caps["websearch"] else disabledmlist.append("WebSearchProxy")
|
||||||
|
|
Loading…
Add table
Reference in a new issue