mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Implemented Kcpp Launch Templates (+1 squashed commits)
Squashed commits: [5ea4c1de] wip integrating skcpps templates (+1 squashed commits) Squashed commits: [737daa7f] skcpps wip
This commit is contained in:
parent
8ccc0144d2
commit
ad86b1aeb8
1 changed files with 100 additions and 48 deletions
148
koboldcpp.py
148
koboldcpp.py
|
@ -221,6 +221,16 @@ def getabspath():
|
||||||
def file_exists(filename):
|
def file_exists(filename):
|
||||||
return os.path.exists(os.path.join(getdirpath(), filename))
|
return os.path.exists(os.path.join(getdirpath(), filename))
|
||||||
|
|
||||||
|
def get_default_threads():
|
||||||
|
physical_core_limit = 1
|
||||||
|
if os.cpu_count()!=None and os.cpu_count()>1:
|
||||||
|
physical_core_limit = os.cpu_count() // 2
|
||||||
|
default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1))
|
||||||
|
processor = platform.processor()
|
||||||
|
if 'Intel' in processor:
|
||||||
|
default_threads = (8 if default_threads > 8 else default_threads) #this helps avoid e-cores.
|
||||||
|
return default_threads
|
||||||
|
|
||||||
def pick_existant_file(ntoption,nonntoption):
|
def pick_existant_file(ntoption,nonntoption):
|
||||||
precompiled_prefix = "precompiled_"
|
precompiled_prefix = "precompiled_"
|
||||||
ntexist = file_exists(ntoption)
|
ntexist = file_exists(ntoption)
|
||||||
|
@ -251,6 +261,19 @@ lib_hipblas = pick_existant_file("koboldcpp_hipblas.dll","koboldcpp_hipblas.so")
|
||||||
lib_vulkan = pick_existant_file("koboldcpp_vulkan.dll","koboldcpp_vulkan.so")
|
lib_vulkan = pick_existant_file("koboldcpp_vulkan.dll","koboldcpp_vulkan.so")
|
||||||
lib_vulkan_noavx2 = pick_existant_file("koboldcpp_vulkan_noavx2.dll","koboldcpp_vulkan_noavx2.so")
|
lib_vulkan_noavx2 = pick_existant_file("koboldcpp_vulkan_noavx2.dll","koboldcpp_vulkan_noavx2.so")
|
||||||
libname = ""
|
libname = ""
|
||||||
|
lib_option_pairs = [
|
||||||
|
(lib_openblas, "Use OpenBLAS"),
|
||||||
|
(lib_default, "Use No BLAS"),
|
||||||
|
(lib_clblast, "Use CLBlast"),
|
||||||
|
(lib_cublas, "Use CuBLAS"),
|
||||||
|
(lib_hipblas, "Use hipBLAS (ROCm)"),
|
||||||
|
(lib_vulkan, "Use Vulkan"),
|
||||||
|
(lib_noavx2, "NoAVX2 Mode (Old CPU)"),
|
||||||
|
(lib_clblast_noavx2, "CLBlast NoAVX2 (Old CPU)"),
|
||||||
|
(lib_vulkan_noavx2, "Vulkan NoAVX2 (Old CPU)"),
|
||||||
|
(lib_failsafe, "Failsafe Mode (Old CPU)")]
|
||||||
|
openblas_option, default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
|
||||||
|
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
|
||||||
|
|
||||||
def init_library():
|
def init_library():
|
||||||
global handle, args, libname
|
global handle, args, libname
|
||||||
|
@ -672,6 +695,18 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
||||||
pass
|
pass
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def auto_set_backend_cli():
|
||||||
|
print("\nA .kcppt template was selected - automatically selecting your backend...\n")
|
||||||
|
fetch_gpu_properties(False,True,True)
|
||||||
|
if exitcounter < 100 and MaxMemory[0]>3500000000 and (("Use CuBLAS" in runopts and CUDevicesNames[0]!="") or "Use hipBLAS (ROCm)" in runopts) and any(CUDevicesNames):
|
||||||
|
if "Use CuBLAS" in runopts or "Use hipBLAS (ROCm)" in runopts:
|
||||||
|
args.usecublas = ["normal","mmq"]
|
||||||
|
elif exitcounter < 100 and (1 in VKIsDGPU) and "Use Vulkan" in runopts:
|
||||||
|
for i in range(0,len(VKIsDGPU)):
|
||||||
|
if VKIsDGPU[i]==1:
|
||||||
|
args.usevulkan = []
|
||||||
|
break
|
||||||
|
|
||||||
def load_model(model_filename):
|
def load_model(model_filename):
|
||||||
global args
|
global args
|
||||||
inputs = load_model_inputs()
|
inputs = load_model_inputs()
|
||||||
|
@ -2004,8 +2039,8 @@ def show_gui():
|
||||||
args.model_param = askopenfilename(title="Select ggml model .bin or .gguf file or .kcpps config")
|
args.model_param = askopenfilename(title="Select ggml model .bin or .gguf file or .kcpps config")
|
||||||
root.withdraw()
|
root.withdraw()
|
||||||
root.quit()
|
root.quit()
|
||||||
if args.model_param and args.model_param!="" and (args.model_param.lower().endswith('.kcpps') or args.model_param.lower().endswith('.skcpps')):
|
if args.model_param and args.model_param!="" and (args.model_param.lower().endswith('.kcpps') or args.model_param.lower().endswith('.kcppt')):
|
||||||
loadconfigfile(args.model_param)
|
load_config_cli(args.model_param)
|
||||||
if not args.model_param and not args.sdmodel and not args.whispermodel:
|
if not args.model_param and not args.sdmodel and not args.whispermodel:
|
||||||
global exitcounter
|
global exitcounter
|
||||||
exitcounter = 999
|
exitcounter = 999
|
||||||
|
@ -2072,6 +2107,7 @@ def show_gui():
|
||||||
root.bind("<Configure>", on_resize)
|
root.bind("<Configure>", on_resize)
|
||||||
global using_gui_launcher
|
global using_gui_launcher
|
||||||
using_gui_launcher = True
|
using_gui_launcher = True
|
||||||
|
kcpp_exporting_template = False
|
||||||
|
|
||||||
# trigger empty tooltip then remove it
|
# trigger empty tooltip then remove it
|
||||||
def show_tooltip(event, tooltip_text=None):
|
def show_tooltip(event, tooltip_text=None):
|
||||||
|
@ -2097,6 +2133,8 @@ def show_gui():
|
||||||
show_tooltip(None,"") #initialize tooltip objects
|
show_tooltip(None,"") #initialize tooltip objects
|
||||||
hide_tooltip(None)
|
hide_tooltip(None)
|
||||||
|
|
||||||
|
default_threads = get_default_threads()
|
||||||
|
|
||||||
tabs = ctk.CTkFrame(root, corner_radius = 0, width=windowwidth, height=windowheight-50)
|
tabs = ctk.CTkFrame(root, corner_radius = 0, width=windowwidth, height=windowheight-50)
|
||||||
tabs.grid(row=0, stick="nsew")
|
tabs.grid(row=0, stick="nsew")
|
||||||
tabnames= ["Quick Launch", "Hardware", "Tokens", "Model Files", "Network", "Horde Worker","Image Gen","Audio","Extra"]
|
tabnames= ["Quick Launch", "Hardware", "Tokens", "Model Files", "Network", "Horde Worker","Image Gen","Audio","Extra"]
|
||||||
|
@ -2110,23 +2148,10 @@ def show_gui():
|
||||||
tabcontentframe.grid_propagate(False)
|
tabcontentframe.grid_propagate(False)
|
||||||
|
|
||||||
tabcontent = {}
|
tabcontent = {}
|
||||||
lib_option_pairs = [
|
|
||||||
(lib_openblas, "Use OpenBLAS"),
|
|
||||||
(lib_default, "Use No BLAS"),
|
|
||||||
(lib_clblast, "Use CLBlast"),
|
|
||||||
(lib_cublas, "Use CuBLAS"),
|
|
||||||
(lib_hipblas, "Use hipBLAS (ROCm)"),
|
|
||||||
(lib_vulkan, "Use Vulkan"),
|
|
||||||
(lib_noavx2, "NoAVX2 Mode (Old CPU)"),
|
|
||||||
(lib_clblast_noavx2, "CLBlast NoAVX2 (Old CPU)"),
|
|
||||||
(lib_vulkan_noavx2, "Vulkan NoAVX2 (Old CPU)"),
|
|
||||||
(lib_failsafe, "Failsafe Mode (Old CPU)")]
|
|
||||||
openblas_option, default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
|
|
||||||
# slider data
|
# slider data
|
||||||
blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"]
|
blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"]
|
||||||
blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"]
|
blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"]
|
||||||
contextsize_text = ["256", "512", "1024", "2048", "3072", "4096", "6144", "8192", "12288", "16384", "24576", "32768", "49152", "65536", "98304", "131072"]
|
contextsize_text = ["256", "512", "1024", "2048", "3072", "4096", "6144", "8192", "12288", "16384", "24576", "32768", "49152", "65536", "98304", "131072"]
|
||||||
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
|
|
||||||
antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if not (opt in runopts)]
|
antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if not (opt in runopts)]
|
||||||
quantkv_text = ["F16 (Off)","8-Bit","4-Bit"]
|
quantkv_text = ["F16 (Off)","8-Bit","4-Bit"]
|
||||||
|
|
||||||
|
@ -2198,8 +2223,6 @@ def show_gui():
|
||||||
|
|
||||||
whisper_model_var = ctk.StringVar()
|
whisper_model_var = ctk.StringVar()
|
||||||
|
|
||||||
kcpp_jsonembed_var = ctk.IntVar(value=0)
|
|
||||||
|
|
||||||
def tabbuttonaction(name):
|
def tabbuttonaction(name):
|
||||||
for t in tabcontent:
|
for t in tabcontent:
|
||||||
if name == t:
|
if name == t:
|
||||||
|
@ -2292,10 +2315,15 @@ def show_gui():
|
||||||
|
|
||||||
# decided to follow yellowrose's and kalomaze's suggestions, this function will automatically try to determine GPU identifiers
|
# decided to follow yellowrose's and kalomaze's suggestions, this function will automatically try to determine GPU identifiers
|
||||||
# run in new thread so it doesnt block. does not return anything, instead overwrites specific values and redraws GUI
|
# run in new thread so it doesnt block. does not return anything, instead overwrites specific values and redraws GUI
|
||||||
def auto_set_backend():
|
def auto_set_backend_gui(manual_select=False):
|
||||||
fetch_gpu_properties(True,True,True)
|
|
||||||
#autopick cublas if suitable, requires at least 3.5GB VRAM to auto pick
|
|
||||||
global exitcounter, runmode_untouched
|
global exitcounter, runmode_untouched
|
||||||
|
if manual_select:
|
||||||
|
print("\nA .kcppt template was selected - automatically selecting your backend...\n")
|
||||||
|
runmode_untouched = True
|
||||||
|
fetch_gpu_properties(False,True,True)
|
||||||
|
else:
|
||||||
|
fetch_gpu_properties(True,True,True)
|
||||||
|
#autopick cublas if suitable, requires at least 3.5GB VRAM to auto pick
|
||||||
#we do not want to autoselect hip/cublas if the user has already changed their desired backend!
|
#we do not want to autoselect hip/cublas if the user has already changed their desired backend!
|
||||||
if exitcounter < 100 and MaxMemory[0]>3500000000 and (("Use CuBLAS" in runopts and CUDevicesNames[0]!="") or "Use hipBLAS (ROCm)" in runopts) and (any(CUDevicesNames) or any(CLDevicesNames)) and runmode_untouched:
|
if exitcounter < 100 and MaxMemory[0]>3500000000 and (("Use CuBLAS" in runopts and CUDevicesNames[0]!="") or "Use hipBLAS (ROCm)" in runopts) and (any(CUDevicesNames) or any(CLDevicesNames)) and runmode_untouched:
|
||||||
if "Use CuBLAS" in runopts:
|
if "Use CuBLAS" in runopts:
|
||||||
|
@ -2312,7 +2340,7 @@ def show_gui():
|
||||||
|
|
||||||
def on_picked_model_file(filepath):
|
def on_picked_model_file(filepath):
|
||||||
global gui_layers_untouched
|
global gui_layers_untouched
|
||||||
if filepath.lower().endswith('.kcpps') or filepath.lower().endswith('.skcpps'):
|
if filepath.lower().endswith('.kcpps') or filepath.lower().endswith('.kcppt'):
|
||||||
#load it as a config file instead
|
#load it as a config file instead
|
||||||
with open(filepath, 'r') as f:
|
with open(filepath, 'r') as f:
|
||||||
dict = json.load(f)
|
dict = json.load(f)
|
||||||
|
@ -2678,13 +2706,28 @@ def show_gui():
|
||||||
audio_tab = tabcontent["Audio"]
|
audio_tab = tabcontent["Audio"]
|
||||||
makefileentry(audio_tab, "Whisper Model (Speech-To-Text):", "Select Whisper .bin Model File", whisper_model_var, 1, width=280, filetypes=[("*.bin","*.bin")], tooltiptxt="Select a Whisper .bin model file on disk to be loaded.")
|
makefileentry(audio_tab, "Whisper Model (Speech-To-Text):", "Select Whisper .bin Model File", whisper_model_var, 1, width=280, filetypes=[("*.bin","*.bin")], tooltiptxt="Select a Whisper .bin model file on disk to be loaded.")
|
||||||
|
|
||||||
|
def kcpp_export_template():
|
||||||
|
nonlocal kcpp_exporting_template
|
||||||
|
kcpp_exporting_template = True
|
||||||
|
export_vars()
|
||||||
|
kcpp_exporting_template = False
|
||||||
|
savdict = json.loads(json.dumps(args.__dict__))
|
||||||
|
file_type = [("KoboldCpp LaunchTemplate", "*.kcppt")]
|
||||||
|
savdict["istemplate"] = True
|
||||||
|
filename = asksaveasfile(filetypes=file_type, defaultextension=file_type)
|
||||||
|
if filename == None: return
|
||||||
|
file = open(str(filename.name), 'a')
|
||||||
|
file.write(json.dumps(savdict))
|
||||||
|
file.close()
|
||||||
|
pass
|
||||||
|
|
||||||
# extra tab
|
# extra tab
|
||||||
extra_tab = tabcontent["Extra"]
|
extra_tab = tabcontent["Extra"]
|
||||||
makelabel(extra_tab, "Unpack KoboldCpp to a local directory to modify its files.", 1, 0)
|
makelabel(extra_tab, "Unpack KoboldCpp to a local directory to modify its files.", 1, 0)
|
||||||
makelabel(extra_tab, "You can also launch via koboldcpp.py for faster startup.", 2, 0)
|
makelabel(extra_tab, "You can also launch via koboldcpp.py for faster startup.", 2, 0)
|
||||||
ctk.CTkButton(extra_tab , text = "Unpack KoboldCpp To Folder", command = unpack_to_dir ).grid(row=3,column=0, stick="w", padx= 8, pady=2)
|
ctk.CTkButton(extra_tab , text = "Unpack KoboldCpp To Folder", command = unpack_to_dir ).grid(row=3,column=0, stick="w", padx= 8, pady=2)
|
||||||
makecheckbox(extra_tab, "Save launch settings as portable SKCPPS", kcpp_jsonembed_var, 5, tooltiptxt="Portable sharing format.\nEmbeds any selected JSON files directly into skcpps setting files when saving.")
|
makelabel(extra_tab, "Export as launcher .kcppt template (Expert Only)", 4, 0,tooltiptxt="Creates a KoboldCpp launch template for others to use.\nEmbeds JSON files directly into exported file when saving.\nWhen loaded, forces the backend to be automatically determined.\nWarning! Not recommended for beginners!")
|
||||||
|
ctk.CTkButton(extra_tab , text = "Generate LaunchTemplate", command = kcpp_export_template ).grid(row=5,column=0, stick="w", padx= 8, pady=2)
|
||||||
|
|
||||||
# launch
|
# launch
|
||||||
def guilaunch():
|
def guilaunch():
|
||||||
|
@ -2698,6 +2741,7 @@ def show_gui():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def export_vars():
|
def export_vars():
|
||||||
|
nonlocal kcpp_exporting_template
|
||||||
args.threads = int(threads_var.get())
|
args.threads = int(threads_var.get())
|
||||||
args.usemlock = usemlock.get() == 1
|
args.usemlock = usemlock.get() == 1
|
||||||
args.debugmode = debugmode.get()
|
args.debugmode = debugmode.get()
|
||||||
|
@ -2768,7 +2812,7 @@ def show_gui():
|
||||||
|
|
||||||
args.chatcompletionsadapter = None if chatcompletionsadapter_var.get() == "" else chatcompletionsadapter_var.get()
|
args.chatcompletionsadapter = None if chatcompletionsadapter_var.get() == "" else chatcompletionsadapter_var.get()
|
||||||
try:
|
try:
|
||||||
if kcpp_jsonembed_var.get()==1 and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter):
|
if kcpp_exporting_template and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter):
|
||||||
print(f"Embedding chat completions adapter...") # parse and save embedded preload story
|
print(f"Embedding chat completions adapter...") # parse and save embedded preload story
|
||||||
with open(args.chatcompletionsadapter, 'r') as f:
|
with open(args.chatcompletionsadapter, 'r') as f:
|
||||||
args.chatcompletionsadapter = json.load(f)
|
args.chatcompletionsadapter = json.load(f)
|
||||||
|
@ -2779,7 +2823,7 @@ def show_gui():
|
||||||
args.lora = None if lora_var.get() == "" else ([lora_var.get()] if lora_base_var.get()=="" else [lora_var.get(), lora_base_var.get()])
|
args.lora = None if lora_var.get() == "" else ([lora_var.get()] if lora_base_var.get()=="" else [lora_var.get(), lora_base_var.get()])
|
||||||
args.preloadstory = None if preloadstory_var.get() == "" else preloadstory_var.get()
|
args.preloadstory = None if preloadstory_var.get() == "" else preloadstory_var.get()
|
||||||
try:
|
try:
|
||||||
if kcpp_jsonembed_var.get()==1 and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory):
|
if kcpp_exporting_template and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory):
|
||||||
print(f"Embedding preload story...") # parse and save embedded preload story
|
print(f"Embedding preload story...") # parse and save embedded preload story
|
||||||
with open(args.preloadstory, 'r') as f:
|
with open(args.preloadstory, 'r') as f:
|
||||||
args.preloadstory = json.load(f)
|
args.preloadstory = json.load(f)
|
||||||
|
@ -2970,18 +3014,24 @@ def show_gui():
|
||||||
|
|
||||||
whisper_model_var.set(dict["whispermodel"] if ("whispermodel" in dict and dict["whispermodel"]) else "")
|
whisper_model_var.set(dict["whispermodel"] if ("whispermodel" in dict and dict["whispermodel"]) else "")
|
||||||
|
|
||||||
def save_config():
|
if "istemplate" in dict and dict["istemplate"]:
|
||||||
file_type = [("KoboldCpp Settings", "*.kcpps *.skcpps")]
|
auto_set_backend_gui(True)
|
||||||
|
|
||||||
|
def save_config_gui():
|
||||||
|
nonlocal kcpp_exporting_template
|
||||||
|
kcpp_exporting_template = False
|
||||||
|
export_vars()
|
||||||
|
savdict = json.loads(json.dumps(args.__dict__))
|
||||||
|
file_type = [("KoboldCpp Settings", "*.kcpps")]
|
||||||
filename = asksaveasfile(filetypes=file_type, defaultextension=file_type)
|
filename = asksaveasfile(filetypes=file_type, defaultextension=file_type)
|
||||||
if filename == None: return
|
if filename == None: return
|
||||||
export_vars()
|
|
||||||
file = open(str(filename.name), 'a')
|
file = open(str(filename.name), 'a')
|
||||||
file.write(json.dumps(args.__dict__))
|
file.write(json.dumps(savdict))
|
||||||
file.close()
|
file.close()
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def load_config():
|
def load_config_gui(): #this is used to populate the GUI with a config file, whereas load_config_cli simply overwrites cli args
|
||||||
file_type = [("KoboldCpp Settings", "*.kcpps *.skcpps")]
|
file_type = [("KoboldCpp Settings", "*.kcpps *.kcppt")]
|
||||||
global runmode_untouched
|
global runmode_untouched
|
||||||
runmode_untouched = False
|
runmode_untouched = False
|
||||||
filename = askopenfilename(filetypes=file_type, defaultextension=file_type, initialdir=None)
|
filename = askopenfilename(filetypes=file_type, defaultextension=file_type, initialdir=None)
|
||||||
|
@ -3008,12 +3058,12 @@ def show_gui():
|
||||||
ctk.CTkButton(tabs , text = "Launch", fg_color="#2f8d3c", hover_color="#2faa3c", command = guilaunch, width=80, height = 35 ).grid(row=1,column=1, stick="se", padx= 25, pady=5)
|
ctk.CTkButton(tabs , text = "Launch", fg_color="#2f8d3c", hover_color="#2faa3c", command = guilaunch, width=80, height = 35 ).grid(row=1,column=1, stick="se", padx= 25, pady=5)
|
||||||
|
|
||||||
ctk.CTkButton(tabs , text = "Update", fg_color="#9900cc", hover_color="#aa11dd", command = display_updates, width=90, height = 35 ).grid(row=1,column=0, stick="sw", padx= 5, pady=5)
|
ctk.CTkButton(tabs , text = "Update", fg_color="#9900cc", hover_color="#aa11dd", command = display_updates, width=90, height = 35 ).grid(row=1,column=0, stick="sw", padx= 5, pady=5)
|
||||||
ctk.CTkButton(tabs , text = "Save", fg_color="#084a66", hover_color="#085a88", command = save_config, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 5, pady=5)
|
ctk.CTkButton(tabs , text = "Save", fg_color="#084a66", hover_color="#085a88", command = save_config_gui, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 5, pady=5)
|
||||||
ctk.CTkButton(tabs , text = "Load", fg_color="#084a66", hover_color="#085a88", command = load_config, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 70, pady=5)
|
ctk.CTkButton(tabs , text = "Load", fg_color="#084a66", hover_color="#085a88", command = load_config_gui, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 70, pady=5)
|
||||||
ctk.CTkButton(tabs , text = "Help", fg_color="#992222", hover_color="#bb3333", command = display_help, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 135, pady=5)
|
ctk.CTkButton(tabs , text = "Help", fg_color="#992222", hover_color="#bb3333", command = display_help, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 135, pady=5)
|
||||||
|
|
||||||
# start a thread that tries to get actual gpu names and layer counts
|
# start a thread that tries to get actual gpu names and layer counts
|
||||||
gpuinfo_thread = threading.Thread(target=auto_set_backend)
|
gpuinfo_thread = threading.Thread(target=auto_set_backend_gui)
|
||||||
gpuinfo_thread.start() #submit job in new thread so nothing is waiting
|
gpuinfo_thread.start() #submit job in new thread so nothing is waiting
|
||||||
|
|
||||||
# runs main loop until closed or launch clicked
|
# runs main loop until closed or launch clicked
|
||||||
|
@ -3025,6 +3075,7 @@ def show_gui():
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
else:
|
else:
|
||||||
# processing vars
|
# processing vars
|
||||||
|
kcpp_exporting_template = False
|
||||||
export_vars()
|
export_vars()
|
||||||
|
|
||||||
if not args.model_param and not args.sdmodel and not args.whispermodel:
|
if not args.model_param and not args.sdmodel and not args.whispermodel:
|
||||||
|
@ -3428,12 +3479,15 @@ def unload_libs():
|
||||||
del handle
|
del handle
|
||||||
handle = None
|
handle = None
|
||||||
|
|
||||||
def loadconfigfile(filename):
|
def load_config_cli(filename):
|
||||||
print("Loading kcpps configuration file...")
|
print("Loading .kcpps configuration file...")
|
||||||
with open(filename, 'r') as f:
|
with open(filename, 'r') as f:
|
||||||
config = json.load(f)
|
config = json.load(f)
|
||||||
|
args.istemplate = False
|
||||||
for key, value in config.items():
|
for key, value in config.items():
|
||||||
setattr(args, key, value)
|
setattr(args, key, value)
|
||||||
|
if args.istemplate:
|
||||||
|
auto_set_backend_cli()
|
||||||
|
|
||||||
|
|
||||||
def delete_old_pyinstaller():
|
def delete_old_pyinstaller():
|
||||||
|
@ -3487,7 +3541,7 @@ def main(launch_args,start_server=True):
|
||||||
|
|
||||||
if args.config and len(args.config)==1:
|
if args.config and len(args.config)==1:
|
||||||
if isinstance(args.config[0], str) and os.path.exists(args.config[0]):
|
if isinstance(args.config[0], str) and os.path.exists(args.config[0]):
|
||||||
loadconfigfile(args.config[0])
|
load_config_cli(args.config[0])
|
||||||
elif args.ignoremissing:
|
elif args.ignoremissing:
|
||||||
print("Ignoring missing kcpp config file...")
|
print("Ignoring missing kcpp config file...")
|
||||||
else:
|
else:
|
||||||
|
@ -3497,8 +3551,8 @@ def main(launch_args,start_server=True):
|
||||||
args = convert_outdated_args(args)
|
args = convert_outdated_args(args)
|
||||||
|
|
||||||
#positional handling for kcpps files (drag and drop)
|
#positional handling for kcpps files (drag and drop)
|
||||||
if args.model_param and args.model_param!="" and (args.model_param.lower().endswith('.kcpps') or args.model_param.lower().endswith('.skcpps')):
|
if args.model_param and args.model_param!="" and (args.model_param.lower().endswith('.kcpps') or args.model_param.lower().endswith('.kcppt')):
|
||||||
loadconfigfile(args.model_param)
|
load_config_cli(args.model_param)
|
||||||
|
|
||||||
#prevent quantkv from being used without flash attn
|
#prevent quantkv from being used without flash attn
|
||||||
if args.quantkv and args.quantkv>0 and not args.flashattention:
|
if args.quantkv and args.quantkv>0 and not args.flashattention:
|
||||||
|
@ -3665,7 +3719,7 @@ def main(launch_args,start_server=True):
|
||||||
pass
|
pass
|
||||||
elif args.gpulayers==-1 and not shouldavoidgpu and os.path.exists(args.model_param):
|
elif args.gpulayers==-1 and not shouldavoidgpu and os.path.exists(args.model_param):
|
||||||
print("Trying to automatically determine GPU layers...")
|
print("Trying to automatically determine GPU layers...")
|
||||||
if MaxMemory[0] == 0: #try to get gpu vram for cuda
|
if MaxMemory[0] == 0: #try to get gpu vram for cuda if not picked yet
|
||||||
fetch_gpu_properties(False,True,False)
|
fetch_gpu_properties(False,True,False)
|
||||||
pass
|
pass
|
||||||
if MaxMemory[0] > 0:
|
if MaxMemory[0] > 0:
|
||||||
|
@ -3673,6 +3727,10 @@ def main(launch_args,start_server=True):
|
||||||
print(f"Auto Recommended Layers: {layeramt}")
|
print(f"Auto Recommended Layers: {layeramt}")
|
||||||
args.gpulayers = layeramt
|
args.gpulayers = layeramt
|
||||||
|
|
||||||
|
if args.threads == -1:
|
||||||
|
args.threads = get_default_threads()
|
||||||
|
print(f"Auto Set Threads: {args.threads}")
|
||||||
|
|
||||||
init_library() # Note: if blas does not exist and is enabled, program will crash.
|
init_library() # Note: if blas does not exist and is enabled, program will crash.
|
||||||
print("==========")
|
print("==========")
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
@ -3988,14 +4046,8 @@ if __name__ == '__main__':
|
||||||
parser.add_argument("--host", metavar=('[ipaddr]'), help="Host IP to listen on. If empty, all routable interfaces are accepted.", default="")
|
parser.add_argument("--host", metavar=('[ipaddr]'), help="Host IP to listen on. If empty, all routable interfaces are accepted.", default="")
|
||||||
parser.add_argument("--launch", help="Launches a web browser when load is completed.", action='store_true')
|
parser.add_argument("--launch", help="Launches a web browser when load is completed.", action='store_true')
|
||||||
parser.add_argument("--config", metavar=('[filename]'), help="Load settings from a .kcpps file. Other arguments will be ignored", type=str, nargs=1)
|
parser.add_argument("--config", metavar=('[filename]'), help="Load settings from a .kcpps file. Other arguments will be ignored", type=str, nargs=1)
|
||||||
physical_core_limit = 1
|
|
||||||
if os.cpu_count()!=None and os.cpu_count()>1:
|
parser.add_argument("--threads", metavar=('[threads]'), help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=get_default_threads())
|
||||||
physical_core_limit = os.cpu_count() // 2
|
|
||||||
default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1))
|
|
||||||
processor = platform.processor()
|
|
||||||
if 'Intel' in processor:
|
|
||||||
default_threads = (8 if default_threads > 8 else default_threads) #this helps avoid e-cores.
|
|
||||||
parser.add_argument("--threads", metavar=('[threads]'), help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads)
|
|
||||||
compatgroup = parser.add_mutually_exclusive_group()
|
compatgroup = parser.add_mutually_exclusive_group()
|
||||||
compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs. For hipBLAS binaries, please check YellowRoseCx rocm fork.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq] [rowsplit]'), choices=['normal', 'lowvram', '0', '1', '2', '3', 'mmq', 'rowsplit'])
|
compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs. For hipBLAS binaries, please check YellowRoseCx rocm fork.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq] [rowsplit]'), choices=['normal', 'lowvram', '0', '1', '2', '3', 'mmq', 'rowsplit'])
|
||||||
compatgroup.add_argument("--usevulkan", help="Use Vulkan for GPU Acceleration. Can optionally specify GPU Device ID (e.g. --usevulkan 0).", metavar=('[Device ID]'), nargs='*', type=int, default=None)
|
compatgroup.add_argument("--usevulkan", help="Use Vulkan for GPU Acceleration. Can optionally specify GPU Device ID (e.g. --usevulkan 0).", metavar=('[Device ID]'), nargs='*', type=int, default=None)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue