mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
added one more backend for clblast noavx2 and clblast failsafe
This commit is contained in:
parent
898856e183
commit
7a5499e77b
10 changed files with 57 additions and 382 deletions
41
koboldcpp.py
41
koboldcpp.py
|
@ -377,6 +377,7 @@ lib_failsafe = pick_existant_file("koboldcpp_failsafe.dll","koboldcpp_failsafe.s
|
|||
lib_noavx2 = pick_existant_file("koboldcpp_noavx2.dll","koboldcpp_noavx2.so")
|
||||
lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so")
|
||||
lib_clblast_noavx2 = pick_existant_file("koboldcpp_clblast_noavx2.dll","koboldcpp_clblast_noavx2.so")
|
||||
lib_clblast_failsafe = pick_existant_file("koboldcpp_clblast_failsafe.dll","koboldcpp_clblast_failsafe.so")
|
||||
lib_cublas = pick_existant_file("koboldcpp_cublas.dll","koboldcpp_cublas.so")
|
||||
lib_hipblas = pick_existant_file("koboldcpp_hipblas.dll","koboldcpp_hipblas.so")
|
||||
lib_vulkan = pick_existant_file("koboldcpp_vulkan.dll","koboldcpp_vulkan.so")
|
||||
|
@ -384,26 +385,30 @@ lib_vulkan_noavx2 = pick_existant_file("koboldcpp_vulkan_noavx2.dll","koboldcpp_
|
|||
libname = ""
|
||||
lib_option_pairs = [
|
||||
(lib_default, "Use CPU"),
|
||||
(lib_clblast, "Use CLBlast"),
|
||||
(lib_cublas, "Use CuBLAS"),
|
||||
(lib_hipblas, "Use hipBLAS (ROCm)"),
|
||||
(lib_vulkan, "Use Vulkan"),
|
||||
(lib_clblast, "Use CLBlast"),
|
||||
(lib_noavx2, "Use CPU (Old CPU)"),
|
||||
(lib_vulkan_noavx2, "Use Vulkan (Old CPU)"),
|
||||
(lib_clblast_noavx2, "Use CLBlast (Older CPU)"),
|
||||
(lib_clblast_noavx2, "Use CLBlast (Old CPU)"),
|
||||
(lib_clblast_failsafe, "Use CLBlast (Older CPU)"),
|
||||
(lib_failsafe, "Failsafe Mode (Older CPU)")]
|
||||
default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, vulkan_noavx2_option, clblast_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
|
||||
default_option, cublas_option, hipblas_option, vulkan_option, clblast_option, noavx2_option, vulkan_noavx2_option, clblast_noavx2_option, clblast_failsafe_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
|
||||
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
|
||||
|
||||
def init_library():
|
||||
global handle, args, libname
|
||||
global lib_default,lib_failsafe,lib_noavx2,lib_clblast,lib_clblast_noavx2,lib_cublas,lib_hipblas,lib_vulkan,lib_vulkan_noavx2
|
||||
global lib_default,lib_failsafe,lib_noavx2,lib_clblast,lib_clblast_noavx2,lib_clblast_failsafe,lib_cublas,lib_hipblas,lib_vulkan,lib_vulkan_noavx2
|
||||
|
||||
libname = lib_default
|
||||
|
||||
if args.noavx2:
|
||||
if args.useclblast and file_exists(lib_clblast_noavx2) and (os.name!='nt' or file_exists("clblast.dll")):
|
||||
libname = lib_clblast_noavx2
|
||||
if args.useclblast and (os.name!='nt' or file_exists("clblast.dll")):
|
||||
if (args.failsafe) and file_exists(lib_clblast_failsafe):
|
||||
libname = lib_clblast_failsafe
|
||||
elif file_exists(lib_clblast_noavx2):
|
||||
libname = lib_clblast_noavx2
|
||||
elif (args.usevulkan is not None) and file_exists(lib_vulkan_noavx2):
|
||||
libname = lib_vulkan_noavx2
|
||||
elif (args.failsafe) and file_exists(lib_failsafe):
|
||||
|
@ -3425,7 +3430,7 @@ def show_gui():
|
|||
# backend count label with the tooltip function
|
||||
nl = '\n'
|
||||
tooltxt = "Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "")
|
||||
num_backends_built = makelabel(parent, str(len(runopts)) + "/8", 5, 2,tooltxt)
|
||||
num_backends_built = makelabel(parent, str(len(runopts)) + "/9", 5, 2,tooltxt)
|
||||
num_backends_built.grid(row=1, column=1, padx=205, pady=0)
|
||||
num_backends_built.configure(text_color="#00ff00")
|
||||
|
||||
|
@ -3446,7 +3451,7 @@ def show_gui():
|
|||
predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())]))
|
||||
max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "")
|
||||
index = runopts_var.get()
|
||||
gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)")
|
||||
gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)")
|
||||
layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
||||
quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
||||
if sys.platform=="darwin" and gpulayers_var.get()=="-1":
|
||||
|
@ -3477,7 +3482,7 @@ def show_gui():
|
|||
if v == "Use Vulkan" or v == "Use Vulkan (Old CPU)":
|
||||
quick_gpuname_label.configure(text=VKDevicesNames[s])
|
||||
gpuname_label.configure(text=VKDevicesNames[s])
|
||||
elif v == "Use CLBlast" or v == "Use CLBlast (Older CPU)":
|
||||
elif v == "Use CLBlast" or v == "Use CLBlast (Old CPU)" or v == "Use CLBlast (Older CPU)":
|
||||
quick_gpuname_label.configure(text=CLDevicesNames[s])
|
||||
gpuname_label.configure(text=CLDevicesNames[s])
|
||||
else:
|
||||
|
@ -3534,12 +3539,12 @@ def show_gui():
|
|||
global runmode_untouched
|
||||
runmode_untouched = False
|
||||
index = runopts_var.get()
|
||||
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
|
||||
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
|
||||
quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
|
||||
gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
|
||||
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
||||
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
||||
if index == "Use CLBlast" or index == "Use CLBlast (Older CPU)":
|
||||
if index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)":
|
||||
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
||||
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
||||
CUDA_gpu_selector_box.grid_remove()
|
||||
|
@ -3583,7 +3588,7 @@ def show_gui():
|
|||
else:
|
||||
quick_use_flashattn.grid(row=22, column=1, padx=8, pady=1, stick="nw")
|
||||
|
||||
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
|
||||
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
|
||||
gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
|
||||
gpu_layers_entry.grid(row=6, column=1, padx=8, pady=1, stick="nw")
|
||||
quick_gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
|
||||
|
@ -3954,10 +3959,13 @@ def show_gui():
|
|||
args.noavx2 = False
|
||||
if gpu_choice_var.get()!="All":
|
||||
gpuchoiceidx = int(gpu_choice_var.get())-1
|
||||
if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Older CPU)":
|
||||
if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Old CPU)" or runopts_var.get() == "Use CLBlast (Older CPU)":
|
||||
args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx]
|
||||
if runopts_var.get() == "Use CLBlast (Older CPU)":
|
||||
if runopts_var.get() == "Use CLBlast (Old CPU)":
|
||||
args.noavx2 = True
|
||||
elif runopts_var.get() == "Use CLBlast (Older CPU)":
|
||||
args.noavx2 = True
|
||||
args.failsafe = True
|
||||
if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)":
|
||||
if gpu_choice_var.get()=="All":
|
||||
args.usecublas = ["lowvram"] if lowvram_var.get() == 1 else ["normal"]
|
||||
|
@ -4926,6 +4934,9 @@ def main(launch_args,start_server=True):
|
|||
if args.quantkv and args.quantkv>0 and not args.flashattention:
|
||||
exit_with_error(1, "Error: Using --quantkv requires --flashattention")
|
||||
|
||||
if args.failsafe: #failsafe implies noavx2
|
||||
args.noavx2 = True
|
||||
|
||||
if not args.model_param:
|
||||
args.model_param = args.model
|
||||
|
||||
|
@ -5596,7 +5607,7 @@ if __name__ == '__main__':
|
|||
compatgroup3.add_argument("--usemmap", help="If set, uses mmap to load model. This model will not be unloadable.", action='store_true')
|
||||
advparser.add_argument("--usemlock", help="Enables mlock, preventing the RAM used to load the model from being paged out. Not usually recommended.", action='store_true')
|
||||
advparser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices.", action='store_true')
|
||||
advparser.add_argument("--failsafe", help="Use failsafe mode, extremely slow CPU only compatibility mode that should work on all devices.", action='store_true')
|
||||
advparser.add_argument("--failsafe", help="Use failsafe mode, extremely slow CPU only compatibility mode that should work on all devices. Can be combined with useclblast if your device supports OpenCL.", action='store_true')
|
||||
advparser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", nargs='?', const=1, type=int, default=0)
|
||||
advparser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", metavar=('[shell command]'), type=str, default="",nargs=1)
|
||||
advparser.add_argument("--benchmark", help="Do not start server, instead run benchmarks. If filename is provided, appends results to provided file.", metavar=('[filename]'), nargs='?', const="stdout", type=str, default=None)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue