From 813cf829b5bdbaab1d77cef51b3488fb215c4b27 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 6 Jun 2024 18:36:56 +0800 Subject: [PATCH] allow selecting multigpu on vulkan --- expose.cpp | 8 +++--- klite.embd | 32 ++++++++++++++++++++++-- koboldcpp.py | 23 ++++++++++------- otherarch/sdcpp/sdtype_adapter.cpp | 7 +++--- otherarch/whispercpp/whisper_adapter.cpp | 7 +++--- 5 files changed, 53 insertions(+), 24 deletions(-) diff --git a/expose.cpp b/expose.cpp index 89089844f..7fec5cea9 100644 --- a/expose.cpp +++ b/expose.cpp @@ -68,14 +68,12 @@ extern "C" vulkan_info_str += ","; } } - if(vulkan_info_str=="") + if(vulkan_info_str!="") { - vulkan_info_str = "0"; + vulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str; + putenv((char*)vulkandeviceenv.c_str()); } - vulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str; - putenv((char*)vulkandeviceenv.c_str()); - executable_path = inputs.executable_path; if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3 || file_format==FileFormat::GPTJ_4 || file_format==FileFormat::GPTJ_5) diff --git a/klite.embd b/klite.embd index 780260405..0a2d18092 100644 --- a/klite.embd +++ b/klite.embd @@ -9084,6 +9084,10 @@ Current version: 145 st = "<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\n"; et = "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n"; break; + case "10": // Phi-3 Mini + st = "<|end|><|user|>\\n"; + et = "<|end|>\\n<|assistant|>"; + break; default: break; } @@ -10212,6 +10216,8 @@ Current version: 145 } } + var ptt_start_timestamp = performance.now(); + var recent_voice_duration = 0; function ptt_start() { if(voice_typing_mode>0) @@ -10228,6 +10234,7 @@ Current version: 145 } voice_is_recording = true; update_submit_button(false); + ptt_start_timestamp = performance.now(); } } } @@ -10248,13 +10255,25 @@ Current version: 145 preaudioblobs.push(new Blob([preaudiobuffers[i]], { type: 'audio/webm' })); } } + recent_voice_duration = performance.now() - ptt_start_timestamp; if (voicerecorder.state !== "inactive") { voicerecorder.stop(); } voice_is_recording = false; update_submit_button(false); + if(recent_voice_duration<500) //if too short, fall back to click behavior + { + if(is_aesthetic_ui()) + { + chat_submit_generation(); + } + else + { + submit_generation(); + } + } } - }, 500); //prevent premature stopping + }, 280); //prevent premature stopping } } function submit_generation_button(aesthetic_ui) @@ -12750,6 +12769,12 @@ Current version: 145 let completeRecording = new Blob([e.data], { type: 'audio/webm' }); let audiodatareader = new window.FileReader(); + if(recent_voice_duration<550) + { + console.log("Skip too short speech: " + recent_voice_duration); + return; //too short, don't process this + } + if(preaudioblobs.length<2) { audioBlobToDecodedAudioBuffer(completeRecording,(buffer)=>{ @@ -16016,6 +16041,7 @@ Current version: 145 +
Sys. Prompt ?A system pre-prompt sent at the very start to guide the AI behavior. Usually NOT needed.
@@ -16265,7 +16291,7 @@ Current version: 145
-
Speech Control ?Speech Ctrl. ?Requires KoboldCpp with Whisper model loaded. Enables Speech-To-Text voice input. Automatically listens for speech in 'On' mode (Voice Detection), or use Push-To-Talk (PTT).
+ + diff --git a/koboldcpp.py b/koboldcpp.py index 03763c550..eaf3950a5 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -365,15 +365,13 @@ def set_backend_props(inputs): elif (args.usecublas and "3" in args.usecublas): inputs.cublas_info = 3 - if args.usevulkan: + if args.usevulkan: #is an empty array if using vulkan without defined gpu s = "" for l in range(0,len(args.usevulkan)): s += str(args.usevulkan[l]) - if s=="": - s = "0" inputs.vulkan_info = s.encode("UTF-8") else: - inputs.vulkan_info = "0".encode("UTF-8") + inputs.vulkan_info = "".encode("UTF-8") return inputs def end_trim_to_sentence(input_text): @@ -2151,12 +2149,16 @@ def show_new_gui(): gpuname_label.grid(row=3, column=1, padx=75, sticky="W") gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") - if index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)": + if index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)": gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") + CUDA_gpu_selector_box.grid_remove() + CUDA_quick_gpu_selector_box.grid_remove() if gpu_choice_var.get()=="All": gpu_choice_var.set("1") - elif index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": + elif index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": + gpu_selector_box.grid_remove() + quick_gpu_selector_box.grid_remove() CUDA_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") CUDA_quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") else: @@ -2455,7 +2457,10 @@ def show_new_gui(): if rowsplit_var.get()==1: args.usecublas.append("rowsplit") if runopts_var.get() == "Use Vulkan" or runopts_var.get() == "Vulkan NoAVX2 (Old CPU)": - args.usevulkan = [int(gpuchoiceidx)] + if gpu_choice_var.get()=="All": + args.usevulkan = [] + else: + args.usevulkan = [int(gpuchoiceidx)] if runopts_var.get() == "Vulkan NoAVX2 (Old CPU)": args.noavx2 = True if gpulayers_var.get(): @@ -2581,7 +2586,7 @@ def show_new_gui(): if "noavx2" in dict and dict["noavx2"]: if vulkan_noavx2_option is not None: runopts_var.set(vulkan_noavx2_option) - gpu_choice_var.set("1") + gpu_choice_var.set("All") for opt in range(0,4): if opt in dict["usevulkan"]: gpu_choice_var.set(str(opt+1)) @@ -2589,7 +2594,7 @@ def show_new_gui(): else: if vulkan_option is not None: runopts_var.set(vulkan_option) - gpu_choice_var.set("1") + gpu_choice_var.set("All") for opt in range(0,4): if opt in dict["usevulkan"]: gpu_choice_var.set(str(opt+1)) diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp index 1b0fde5b9..bba8b3b63 100644 --- a/otherarch/sdcpp/sdtype_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -188,12 +188,11 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) { vulkan_info_str += ","; } } - if(vulkan_info_str=="") + if(vulkan_info_str!="") { - vulkan_info_str = "0"; + sdvulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str; + putenv((char*)sdvulkandeviceenv.c_str()); } - sdvulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str; - putenv((char*)sdvulkandeviceenv.c_str()); sd_params = new SDParams(); sd_params->model_path = inputs.model_filename; diff --git a/otherarch/whispercpp/whisper_adapter.cpp b/otherarch/whispercpp/whisper_adapter.cpp index 819081d02..8ee9d5ae6 100644 --- a/otherarch/whispercpp/whisper_adapter.cpp +++ b/otherarch/whispercpp/whisper_adapter.cpp @@ -159,12 +159,11 @@ bool whispertype_load_model(const whisper_load_model_inputs inputs) vulkan_info_str += ","; } } - if(vulkan_info_str=="") + if(vulkan_info_str!="") { - vulkan_info_str = "0"; + whispervulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str; + putenv((char*)whispervulkandeviceenv.c_str()); } - whispervulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str; - putenv((char*)whispervulkandeviceenv.c_str()); std::string modelfile = inputs.model_filename;