added one more backend for clblast noavx2 and clblast failsafe

This commit is contained in:
Concedo 2025-01-30 22:47:22 +08:00
parent 898856e183
commit 7a5499e77b
10 changed files with 57 additions and 382 deletions

View file

@ -377,6 +377,7 @@ lib_failsafe = pick_existant_file("koboldcpp_failsafe.dll","koboldcpp_failsafe.s
lib_noavx2 = pick_existant_file("koboldcpp_noavx2.dll","koboldcpp_noavx2.so")
lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so")
lib_clblast_noavx2 = pick_existant_file("koboldcpp_clblast_noavx2.dll","koboldcpp_clblast_noavx2.so")
lib_clblast_failsafe = pick_existant_file("koboldcpp_clblast_failsafe.dll","koboldcpp_clblast_failsafe.so")
lib_cublas = pick_existant_file("koboldcpp_cublas.dll","koboldcpp_cublas.so")
lib_hipblas = pick_existant_file("koboldcpp_hipblas.dll","koboldcpp_hipblas.so")
lib_vulkan = pick_existant_file("koboldcpp_vulkan.dll","koboldcpp_vulkan.so")
@ -384,26 +385,30 @@ lib_vulkan_noavx2 = pick_existant_file("koboldcpp_vulkan_noavx2.dll","koboldcpp_
libname = ""
lib_option_pairs = [
(lib_default, "Use CPU"),
(lib_clblast, "Use CLBlast"),
(lib_cublas, "Use CuBLAS"),
(lib_hipblas, "Use hipBLAS (ROCm)"),
(lib_vulkan, "Use Vulkan"),
(lib_clblast, "Use CLBlast"),
(lib_noavx2, "Use CPU (Old CPU)"),
(lib_vulkan_noavx2, "Use Vulkan (Old CPU)"),
(lib_clblast_noavx2, "Use CLBlast (Older CPU)"),
(lib_clblast_noavx2, "Use CLBlast (Old CPU)"),
(lib_clblast_failsafe, "Use CLBlast (Older CPU)"),
(lib_failsafe, "Failsafe Mode (Older CPU)")]
default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, vulkan_noavx2_option, clblast_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
default_option, cublas_option, hipblas_option, vulkan_option, clblast_option, noavx2_option, vulkan_noavx2_option, clblast_noavx2_option, clblast_failsafe_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
def init_library():
global handle, args, libname
global lib_default,lib_failsafe,lib_noavx2,lib_clblast,lib_clblast_noavx2,lib_cublas,lib_hipblas,lib_vulkan,lib_vulkan_noavx2
global lib_default,lib_failsafe,lib_noavx2,lib_clblast,lib_clblast_noavx2,lib_clblast_failsafe,lib_cublas,lib_hipblas,lib_vulkan,lib_vulkan_noavx2
libname = lib_default
if args.noavx2:
if args.useclblast and file_exists(lib_clblast_noavx2) and (os.name!='nt' or file_exists("clblast.dll")):
libname = lib_clblast_noavx2
if args.useclblast and (os.name!='nt' or file_exists("clblast.dll")):
if (args.failsafe) and file_exists(lib_clblast_failsafe):
libname = lib_clblast_failsafe
elif file_exists(lib_clblast_noavx2):
libname = lib_clblast_noavx2
elif (args.usevulkan is not None) and file_exists(lib_vulkan_noavx2):
libname = lib_vulkan_noavx2
elif (args.failsafe) and file_exists(lib_failsafe):
@ -3425,7 +3430,7 @@ def show_gui():
# backend count label with the tooltip function
nl = '\n'
tooltxt = "Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "")
num_backends_built = makelabel(parent, str(len(runopts)) + "/8", 5, 2,tooltxt)
num_backends_built = makelabel(parent, str(len(runopts)) + "/9", 5, 2,tooltxt)
num_backends_built.grid(row=1, column=1, padx=205, pady=0)
num_backends_built.configure(text_color="#00ff00")
@ -3446,7 +3451,7 @@ def show_gui():
predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())]))
max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "")
index = runopts_var.get()
gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)")
gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)")
layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
if sys.platform=="darwin" and gpulayers_var.get()=="-1":
@ -3477,7 +3482,7 @@ def show_gui():
if v == "Use Vulkan" or v == "Use Vulkan (Old CPU)":
quick_gpuname_label.configure(text=VKDevicesNames[s])
gpuname_label.configure(text=VKDevicesNames[s])
elif v == "Use CLBlast" or v == "Use CLBlast (Older CPU)":
elif v == "Use CLBlast" or v == "Use CLBlast (Old CPU)" or v == "Use CLBlast (Older CPU)":
quick_gpuname_label.configure(text=CLDevicesNames[s])
gpuname_label.configure(text=CLDevicesNames[s])
else:
@ -3534,12 +3539,12 @@ def show_gui():
global runmode_untouched
runmode_untouched = False
index = runopts_var.get()
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
if index == "Use CLBlast" or index == "Use CLBlast (Older CPU)":
if index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)":
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
CUDA_gpu_selector_box.grid_remove()
@ -3583,7 +3588,7 @@ def show_gui():
else:
quick_use_flashattn.grid(row=22, column=1, padx=8, pady=1, stick="nw")
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
gpu_layers_entry.grid(row=6, column=1, padx=8, pady=1, stick="nw")
quick_gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
@ -3954,10 +3959,13 @@ def show_gui():
args.noavx2 = False
if gpu_choice_var.get()!="All":
gpuchoiceidx = int(gpu_choice_var.get())-1
if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Older CPU)":
if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Old CPU)" or runopts_var.get() == "Use CLBlast (Older CPU)":
args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx]
if runopts_var.get() == "Use CLBlast (Older CPU)":
if runopts_var.get() == "Use CLBlast (Old CPU)":
args.noavx2 = True
elif runopts_var.get() == "Use CLBlast (Older CPU)":
args.noavx2 = True
args.failsafe = True
if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)":
if gpu_choice_var.get()=="All":
args.usecublas = ["lowvram"] if lowvram_var.get() == 1 else ["normal"]
@ -4926,6 +4934,9 @@ def main(launch_args,start_server=True):
if args.quantkv and args.quantkv>0 and not args.flashattention:
exit_with_error(1, "Error: Using --quantkv requires --flashattention")
if args.failsafe: #failsafe implies noavx2
args.noavx2 = True
if not args.model_param:
args.model_param = args.model
@ -5596,7 +5607,7 @@ if __name__ == '__main__':
compatgroup3.add_argument("--usemmap", help="If set, uses mmap to load model. This model will not be unloadable.", action='store_true')
advparser.add_argument("--usemlock", help="Enables mlock, preventing the RAM used to load the model from being paged out. Not usually recommended.", action='store_true')
advparser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices.", action='store_true')
advparser.add_argument("--failsafe", help="Use failsafe mode, extremely slow CPU only compatibility mode that should work on all devices.", action='store_true')
advparser.add_argument("--failsafe", help="Use failsafe mode, extremely slow CPU only compatibility mode that should work on all devices. Can be combined with useclblast if your device supports OpenCL.", action='store_true')
advparser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", nargs='?', const=1, type=int, default=0)
advparser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", metavar=('[shell command]'), type=str, default="",nargs=1)
advparser.add_argument("--benchmark", help="Do not start server, instead run benchmarks. If filename is provided, appends results to provided file.", metavar=('[filename]'), nargs='?', const="stdout", type=str, default=None)