From 813cf829b5bdbaab1d77cef51b3488fb215c4b27 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Thu, 6 Jun 2024 18:36:56 +0800
Subject: [PATCH] allow selecting multigpu on vulkan

---
 expose.cpp                               |  8 +++---
 klite.embd                               | 32 ++++++++++++++++++++++--
 koboldcpp.py                             | 23 ++++++++++-------
 otherarch/sdcpp/sdtype_adapter.cpp       |  7 +++---
 otherarch/whispercpp/whisper_adapter.cpp |  7 +++---
 5 files changed, 53 insertions(+), 24 deletions(-)
diff --git a/expose.cpp b/expose.cpp
index 89089844f..7fec5cea9 100644
--- a/expose.cpp
+++ b/expose.cpp
@@ -68,14 +68,12 @@ extern "C"
                 vulkan_info_str += ",";
             }
         }
-        if(vulkan_info_str=="")
+        if(vulkan_info_str!="")
         {
-            vulkan_info_str = "0";
+            vulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str;
+            putenv((char*)vulkandeviceenv.c_str());
         }
 
-        vulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str;
-        putenv((char*)vulkandeviceenv.c_str());
-
         executable_path = inputs.executable_path;
 
         if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3 || file_format==FileFormat::GPTJ_4  || file_format==FileFormat::GPTJ_5)
diff --git a/klite.embd b/klite.embd
index 780260405..0a2d18092 100644
--- a/klite.embd
+++ b/klite.embd
@@ -9084,6 +9084,10 @@ Current version: 145
 				st = "<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\n";
 				et = "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n";
 				break;
+			case "10": // Phi-3 Mini
+                st = "<|end|><|user|>\\n";
+                et = "<|end|>\\n<|assistant|>";
+                break;
 			default:
 				break;
 		}
@@ -10212,6 +10216,8 @@ Current version: 145
 		}
 	}
 
+	var ptt_start_timestamp = performance.now();
+	var recent_voice_duration = 0;
 	function ptt_start()
 	{
 		if(voice_typing_mode>0)
@@ -10228,6 +10234,7 @@ Current version: 145
 				}
 				voice_is_recording = true;
 				update_submit_button(false);
+				ptt_start_timestamp = performance.now();
 			}
 		}
 	}
@@ -10248,13 +10255,25 @@ Current version: 145
 							preaudioblobs.push(new Blob([preaudiobuffers[i]], { type: 'audio/webm' }));
 						}
 					}
+					recent_voice_duration = performance.now() - ptt_start_timestamp;
 					if (voicerecorder.state !== "inactive") {
 						voicerecorder.stop();
 					}
 					voice_is_recording = false;
 					update_submit_button(false);
+					if(recent_voice_duration<500) //if too short, fall back to click behavior
+					{
+						if(is_aesthetic_ui())
+						{
+							chat_submit_generation();
+						}
+						else
+						{
+							submit_generation();
+						}
+					}
 				}
-			}, 500); //prevent premature stopping
+			}, 280); //prevent premature stopping
 		}
 	}
 	function submit_generation_button(aesthetic_ui)
@@ -12750,6 +12769,12 @@ Current version: 145
 			let completeRecording = new Blob([e.data], { type: 'audio/webm' });
 			let audiodatareader = new window.FileReader();
 
+			if(recent_voice_duration<550)
+			{
+				console.log("Skip too short speech: " + recent_voice_duration);
+				return; //too short, don't process this
+			}
+
 			if(preaudioblobs.length<2)
 			{
 				audioBlobToDecodedAudioBuffer(completeRecording,(buffer)=>{
@@ -16016,6 +16041,7 @@ Current version: 145
 								<option value="7">KoboldAI Format</option>
 								<option value="8">CommandR</option>
 								<option value="9">Llama 3 Chat</option>
+								<option value="10">Phi-3</option>
 							</select>
 							<div class="settingsmall miniinput" style="width:100%;padding:2px">
 							<div class="justifyleft settingsmall">Sys. Prompt <span class="helpicon">?<span class="helptext">A system pre-prompt sent at the very start to guide the AI behavior. Usually NOT needed.</span></span></div>
@@ -16265,7 +16291,7 @@ Current version: 145
 						   <input type="checkbox" id="notify_on" style="margin:0px 0 0;">
 						</div>
 						<div class="settinglabel">
-							<div class="justifyleft settingsmall" title="">Speech Control <span class="helpicon">?<span
+							<div class="justifyleft settingsmall" title="">Speech Ctrl. <span class="helpicon">?<span
 							class="helptext">Requires KoboldCpp with Whisper model loaded. Enables Speech-To-Text voice input. Automatically listens for speech in 'On' mode (Voice Detection), or use Push-To-Talk (PTT).</span></span></div>
 							<select style="padding:1px; height:auto; width: 60px; appearance: none; font-size: 6pt; margin:0px 0px 0px auto;" class="form-control" id="voice_typing_mode">
 								<option value="0">Off</option>
@@ -16499,6 +16525,8 @@ Current version: 145
 						class="helptext">Controls how far back in the text to search for World Info Keys</span></span></div>
 				<select style="height:16px;padding:0px;margin:0px 4px 0; width:90px;font-size:10px;" class="form-control" id="wi_searchdepth">
 					<option value="0">Full Context</option>
+					<option value="4096">Last 4096</option>
+					<option value="2048">Last 2048</option>
 					<option value="1024">Last 1024</option>
 					<option value="512">Last 512</option>
 					<option value="256">Last 256</option>
diff --git a/koboldcpp.py b/koboldcpp.py
index 03763c550..eaf3950a5 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -365,15 +365,13 @@ def set_backend_props(inputs):
         elif (args.usecublas and "3" in args.usecublas):
             inputs.cublas_info = 3
 
-    if args.usevulkan:
+    if args.usevulkan: #is an empty array if using vulkan without defined gpu
         s = ""
         for l in range(0,len(args.usevulkan)):
             s += str(args.usevulkan[l])
-        if s=="":
-            s = "0"
         inputs.vulkan_info = s.encode("UTF-8")
     else:
-        inputs.vulkan_info = "0".encode("UTF-8")
+        inputs.vulkan_info = "".encode("UTF-8")
     return inputs
 
 def end_trim_to_sentence(input_text):
@@ -2151,12 +2149,16 @@ def show_new_gui():
             gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
             gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
             quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
-            if index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)":
+            if index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)":
                 gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
                 quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
+                CUDA_gpu_selector_box.grid_remove()
+                CUDA_quick_gpu_selector_box.grid_remove()
                 if gpu_choice_var.get()=="All":
                     gpu_choice_var.set("1")
-            elif index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
+            elif index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
+                gpu_selector_box.grid_remove()
+                quick_gpu_selector_box.grid_remove()
                 CUDA_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
                 CUDA_quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
         else:
@@ -2455,7 +2457,10 @@ def show_new_gui():
             if rowsplit_var.get()==1:
                 args.usecublas.append("rowsplit")
         if runopts_var.get() == "Use Vulkan" or runopts_var.get() == "Vulkan NoAVX2 (Old CPU)":
-            args.usevulkan = [int(gpuchoiceidx)]
+            if gpu_choice_var.get()=="All":
+                args.usevulkan = []
+            else:
+                args.usevulkan = [int(gpuchoiceidx)]
             if runopts_var.get() == "Vulkan NoAVX2 (Old CPU)":
                 args.noavx2 = True
         if gpulayers_var.get():
@@ -2581,7 +2586,7 @@ def show_new_gui():
             if "noavx2" in dict and dict["noavx2"]:
                 if vulkan_noavx2_option is not None:
                     runopts_var.set(vulkan_noavx2_option)
-                    gpu_choice_var.set("1")
+                    gpu_choice_var.set("All")
                     for opt in range(0,4):
                         if opt in dict["usevulkan"]:
                             gpu_choice_var.set(str(opt+1))
@@ -2589,7 +2594,7 @@ def show_new_gui():
             else:
                 if vulkan_option is not None:
                     runopts_var.set(vulkan_option)
-                    gpu_choice_var.set("1")
+                    gpu_choice_var.set("All")
                     for opt in range(0,4):
                         if opt in dict["usevulkan"]:
                             gpu_choice_var.set(str(opt+1))
diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp
index 1b0fde5b9..bba8b3b63 100644
--- a/otherarch/sdcpp/sdtype_adapter.cpp
+++ b/otherarch/sdcpp/sdtype_adapter.cpp
@@ -188,12 +188,11 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
             vulkan_info_str += ",";
         }
     }
-    if(vulkan_info_str=="")
+    if(vulkan_info_str!="")
     {
-        vulkan_info_str = "0";
+        sdvulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str;
+        putenv((char*)sdvulkandeviceenv.c_str());
     }
-    sdvulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str;
-    putenv((char*)sdvulkandeviceenv.c_str());
 
     sd_params = new SDParams();
     sd_params->model_path = inputs.model_filename;
diff --git a/otherarch/whispercpp/whisper_adapter.cpp b/otherarch/whispercpp/whisper_adapter.cpp
index 819081d02..8ee9d5ae6 100644
--- a/otherarch/whispercpp/whisper_adapter.cpp
+++ b/otherarch/whispercpp/whisper_adapter.cpp
@@ -159,12 +159,11 @@ bool whispertype_load_model(const whisper_load_model_inputs inputs)
             vulkan_info_str += ",";
         }
     }
-    if(vulkan_info_str=="")
+    if(vulkan_info_str!="")
     {
-        vulkan_info_str = "0";
+        whispervulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str;
+        putenv((char*)whispervulkandeviceenv.c_str());
     }
-    whispervulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str;
-    putenv((char*)whispervulkandeviceenv.c_str());
 
 
     std::string modelfile = inputs.model_filename;