mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
fixed some global reference
This commit is contained in:
parent
ff9b4041da
commit
b246d83dca
2 changed files with 59 additions and 77 deletions
|
@ -171,7 +171,7 @@ if (LLAMA_HIPBLAS)
|
||||||
message(STATUS "HIP and hipBLAS found")
|
message(STATUS "HIP and hipBLAS found")
|
||||||
file(GLOB GGML_SOURCES_ROCM "ggml/src/ggml-cuda/*.cu")
|
file(GLOB GGML_SOURCES_ROCM "ggml/src/ggml-cuda/*.cu")
|
||||||
list(APPEND GGML_SOURCES_ROCM "ggml/src/ggml-cuda/ggml-cuda.cu")
|
list(APPEND GGML_SOURCES_ROCM "ggml/src/ggml-cuda/ggml-cuda.cu")
|
||||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu")
|
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-mma*.cu")
|
||||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
|
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
|
||||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||||
|
|
134
koboldcpp.py
134
koboldcpp.py
|
@ -47,6 +47,13 @@ logit_bias_max = 512
|
||||||
dry_seq_break_max = 128
|
dry_seq_break_max = 128
|
||||||
|
|
||||||
# global vars
|
# global vars
|
||||||
|
KcppVersion = "1.83"
|
||||||
|
showdebug = True
|
||||||
|
guimode = False
|
||||||
|
kcpp_instance = None #global running instance
|
||||||
|
global_memory = None
|
||||||
|
using_gui_launcher = False
|
||||||
|
|
||||||
handle = None
|
handle = None
|
||||||
friendlymodelname = "inactive"
|
friendlymodelname = "inactive"
|
||||||
friendlysdmodelname = "inactive"
|
friendlysdmodelname = "inactive"
|
||||||
|
@ -62,9 +69,6 @@ maxhordelen = 400
|
||||||
modelbusy = threading.Lock()
|
modelbusy = threading.Lock()
|
||||||
requestsinqueue = 0
|
requestsinqueue = 0
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.83"
|
|
||||||
showdebug = True
|
|
||||||
guimode = False
|
|
||||||
showsamplerwarning = True
|
showsamplerwarning = True
|
||||||
showmaxctxwarning = True
|
showmaxctxwarning = True
|
||||||
showusedmemwarning = True
|
showusedmemwarning = True
|
||||||
|
@ -100,10 +104,7 @@ start_time = time.time()
|
||||||
last_req_time = time.time()
|
last_req_time = time.time()
|
||||||
last_non_horde_req_time = time.time()
|
last_non_horde_req_time = time.time()
|
||||||
currfinishreason = "null"
|
currfinishreason = "null"
|
||||||
using_gui_launcher = False
|
|
||||||
using_outdated_flags = False
|
|
||||||
kcpp_instance = None #global running instance
|
|
||||||
global_memory = None
|
|
||||||
|
|
||||||
saved_stdout = None
|
saved_stdout = None
|
||||||
saved_stderr = None
|
saved_stderr = None
|
||||||
|
@ -4624,11 +4625,7 @@ def convert_outdated_args(args):
|
||||||
dict = args
|
dict = args
|
||||||
if isinstance(args, argparse.Namespace):
|
if isinstance(args, argparse.Namespace):
|
||||||
dict = vars(args)
|
dict = vars(args)
|
||||||
|
|
||||||
global using_outdated_flags
|
|
||||||
using_outdated_flags = False
|
|
||||||
if "sdconfig" in dict and dict["sdconfig"] and len(dict["sdconfig"])>0:
|
if "sdconfig" in dict and dict["sdconfig"] and len(dict["sdconfig"])>0:
|
||||||
using_outdated_flags = True
|
|
||||||
dict["sdmodel"] = dict["sdconfig"][0]
|
dict["sdmodel"] = dict["sdconfig"][0]
|
||||||
if dict["sdconfig"] and len(dict["sdconfig"]) > 1:
|
if dict["sdconfig"] and len(dict["sdconfig"]) > 1:
|
||||||
dict["sdclamped"] = 512
|
dict["sdclamped"] = 512
|
||||||
|
@ -4637,7 +4634,6 @@ def convert_outdated_args(args):
|
||||||
if dict["sdconfig"] and len(dict["sdconfig"]) > 3:
|
if dict["sdconfig"] and len(dict["sdconfig"]) > 3:
|
||||||
dict["sdquant"] = (True if dict["sdconfig"][3]=="quant" else False)
|
dict["sdquant"] = (True if dict["sdconfig"][3]=="quant" else False)
|
||||||
if "hordeconfig" in dict and dict["hordeconfig"] and dict["hordeconfig"][0]!="":
|
if "hordeconfig" in dict and dict["hordeconfig"] and dict["hordeconfig"][0]!="":
|
||||||
using_outdated_flags = True
|
|
||||||
dict["hordemodelname"] = dict["hordeconfig"][0]
|
dict["hordemodelname"] = dict["hordeconfig"][0]
|
||||||
if len(dict["hordeconfig"]) > 1:
|
if len(dict["hordeconfig"]) > 1:
|
||||||
dict["hordegenlen"] = int(dict["hordeconfig"][1])
|
dict["hordegenlen"] = int(dict["hordeconfig"][1])
|
||||||
|
@ -4648,33 +4644,12 @@ def convert_outdated_args(args):
|
||||||
dict["hordeworkername"] = dict["hordeconfig"][4]
|
dict["hordeworkername"] = dict["hordeconfig"][4]
|
||||||
if "noblas" in dict and dict["noblas"]:
|
if "noblas" in dict and dict["noblas"]:
|
||||||
dict["usecpu"] = True
|
dict["usecpu"] = True
|
||||||
|
|
||||||
if "failsafe" in dict and dict["failsafe"]: #failsafe implies noavx2
|
if "failsafe" in dict and dict["failsafe"]: #failsafe implies noavx2
|
||||||
dict["noavx2"] = True
|
dict["noavx2"] = True
|
||||||
|
|
||||||
if ("model_param" not in dict or not dict["model_param"]) and ("model" in dict and dict["model"]):
|
if ("model_param" not in dict or not dict["model_param"]) and ("model" in dict and dict["model"]):
|
||||||
dict["model_param"] = dict["model"]
|
dict["model_param"] = dict["model"]
|
||||||
|
|
||||||
check_deprecation_warning()
|
|
||||||
return args
|
return args
|
||||||
|
|
||||||
def check_deprecation_warning():
|
|
||||||
# slightly naggy warning to encourage people to switch to new flags
|
|
||||||
# if you want you can remove this at your own risk,
|
|
||||||
# but i am not going to troubleshoot or provide support for deprecated flags.
|
|
||||||
global using_outdated_flags
|
|
||||||
if using_outdated_flags:
|
|
||||||
print("\n=== !!! IMPORTANT WARNING !!! ===")
|
|
||||||
print("You are using one or more OUTDATED config files or launch flags!")
|
|
||||||
print("The flags --hordeconfig and --sdconfig have been DEPRECATED, and MAY be REMOVED in future!")
|
|
||||||
print("They will still work for now, but you SHOULD switch to the updated flags instead, to avoid future issues!")
|
|
||||||
print("New flags are: --hordemodelname --hordeworkername --hordekey --hordemaxctx --hordegenlen --sdmodel --sdthreads --sdquant --sdclamped")
|
|
||||||
print("For more information on these flags, please check --help")
|
|
||||||
print(">>> If you are using the GUI launcher, simply re-saving your config again will get rid of this warning.")
|
|
||||||
print("=== !!! IMPORTANT WARNING !!! ===\n")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def setuptunnel(global_memory, has_sd):
|
def setuptunnel(global_memory, has_sd):
|
||||||
# This script will help setup a cloudflared tunnel for accessing KoboldCpp over the internet
|
# This script will help setup a cloudflared tunnel for accessing KoboldCpp over the internet
|
||||||
# It should work out of the box on both linux and windows
|
# It should work out of the box on both linux and windows
|
||||||
|
@ -4829,8 +4804,12 @@ def reload_new_config(filename): #for changing config after launch
|
||||||
config = json.load(f)
|
config = json.load(f)
|
||||||
args.istemplate = False
|
args.istemplate = False
|
||||||
for key, value in config.items(): #do not overwrite certain values
|
for key, value in config.items(): #do not overwrite certain values
|
||||||
if key not in ["remotetunnel","port","host","port_param","admin","adminpassword","admindir","ssl","nocertify","benchmark","prompt"]:
|
if key not in ["remotetunnel","showgui","port","host","port_param","admin","adminpassword","admindir","ssl","nocertify","benchmark","prompt","config"]:
|
||||||
setattr(args, key, value)
|
setattr(args, key, value)
|
||||||
|
setattr(args,"showgui",False)
|
||||||
|
setattr(args,"benchmark",False)
|
||||||
|
setattr(args,"prompt","")
|
||||||
|
setattr(args,"config",None)
|
||||||
|
|
||||||
def load_config_cli(filename):
|
def load_config_cli(filename):
|
||||||
print("Loading .kcpps configuration file...")
|
print("Loading .kcpps configuration file...")
|
||||||
|
@ -4941,7 +4920,7 @@ def analyze_gguf_model_wrapper(filename=""):
|
||||||
dumpthread = threading.Thread(target=analyze_gguf_model, args=(args,filename))
|
dumpthread = threading.Thread(target=analyze_gguf_model, args=(args,filename))
|
||||||
dumpthread.start()
|
dumpthread.start()
|
||||||
|
|
||||||
def main(launch_args,start_server=True):
|
def main(launch_args):
|
||||||
global args, showdebug, kcpp_instance, exitcounter
|
global args, showdebug, kcpp_instance, exitcounter
|
||||||
args = launch_args #note: these are NOT shared with the child processes!
|
args = launch_args #note: these are NOT shared with the child processes!
|
||||||
|
|
||||||
|
@ -4954,15 +4933,13 @@ def main(launch_args,start_server=True):
|
||||||
exit_with_error(1, "Error: Using --quantkv requires --flashattention")
|
exit_with_error(1, "Error: Using --quantkv requires --flashattention")
|
||||||
|
|
||||||
args = convert_outdated_args(args)
|
args = convert_outdated_args(args)
|
||||||
|
if not ((args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1)):
|
||||||
|
print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}")
|
||||||
|
if args.debugmode != 1:
|
||||||
|
showdebug = False #not shared with child process!
|
||||||
|
|
||||||
if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1):
|
|
||||||
suppress_stdout()
|
|
||||||
|
|
||||||
print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually
|
|
||||||
|
|
||||||
#perform some basic cleanup of old temporary directories
|
|
||||||
try:
|
try:
|
||||||
delete_old_pyinstaller()
|
delete_old_pyinstaller() #perform some basic cleanup of old temporary directories
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error cleaning up orphaned pyinstaller dirs: {e}")
|
print(f"Error cleaning up orphaned pyinstaller dirs: {e}")
|
||||||
|
|
||||||
|
@ -4974,10 +4951,7 @@ def main(launch_args,start_server=True):
|
||||||
analyze_gguf_model_wrapper(args.analyze)
|
analyze_gguf_model_wrapper(args.analyze)
|
||||||
return
|
return
|
||||||
|
|
||||||
if args.debugmode != 1:
|
if args.config and len(args.config)==1: #handle initial config loading for launch
|
||||||
showdebug = False #not shared with child process!
|
|
||||||
|
|
||||||
if args.config and len(args.config)==1: #handle config loading for launch
|
|
||||||
cfgname = args.config[0]
|
cfgname = args.config[0]
|
||||||
if isinstance(cfgname, str):
|
if isinstance(cfgname, str):
|
||||||
dlfile = download_model_from_url(cfgname,[".kcpps",".kcppt"])
|
dlfile = download_model_from_url(cfgname,[".kcpps",".kcppt"])
|
||||||
|
@ -4999,35 +4973,15 @@ def main(launch_args,start_server=True):
|
||||||
args.model_param = dlfile
|
args.model_param = dlfile
|
||||||
load_config_cli(args.model_param)
|
load_config_cli(args.model_param)
|
||||||
|
|
||||||
# lastly, show the GUI launcher if a model was not provided
|
|
||||||
if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel):
|
|
||||||
#give them a chance to pick a file
|
|
||||||
print("For command line arguments, please refer to --help")
|
|
||||||
print("***")
|
|
||||||
try:
|
|
||||||
show_gui()
|
|
||||||
except Exception as ex:
|
|
||||||
exitcounter = 999
|
|
||||||
ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nYou must use the command line instead, e.g. python ./koboldcpp.py --help"
|
|
||||||
show_gui_msgbox("Warning, GUI failed to start",ermsg)
|
|
||||||
if args.skiplauncher:
|
|
||||||
print("Note: In order to use --skiplauncher, you need to specify a model with --model")
|
|
||||||
time.sleep(3)
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
if args.model_param and (args.benchmark or args.prompt):
|
|
||||||
start_server = False
|
|
||||||
|
|
||||||
# manager command queue
|
# manager command queue
|
||||||
multiprocessing.freeze_support()
|
|
||||||
with multiprocessing.Manager() as mp_manager:
|
with multiprocessing.Manager() as mp_manager:
|
||||||
global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":""})
|
global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":"", "input_to_exit":False})
|
||||||
|
|
||||||
if start_server and args.remotetunnel:
|
if args.remotetunnel and not args.prompt and not args.benchmark:
|
||||||
setuptunnel(global_memory, True if args.sdmodel else False)
|
setuptunnel(global_memory, True if args.sdmodel else False)
|
||||||
|
|
||||||
# invoke the main koboldcpp process
|
# invoke the main koboldcpp process
|
||||||
kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "start_server": start_server, "g_memory": global_memory})
|
kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "g_memory": global_memory})
|
||||||
kcpp_instance.daemon = True
|
kcpp_instance.daemon = True
|
||||||
kcpp_instance.start()
|
kcpp_instance.start()
|
||||||
|
|
||||||
|
@ -5051,7 +5005,7 @@ def main(launch_args,start_server=True):
|
||||||
kcpp_instance = None
|
kcpp_instance = None
|
||||||
print("Restarting KoboldCpp...")
|
print("Restarting KoboldCpp...")
|
||||||
reload_new_config(targetfilepath)
|
reload_new_config(targetfilepath)
|
||||||
kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "start_server": start_server, "g_memory": global_memory})
|
kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "g_memory": global_memory})
|
||||||
kcpp_instance.daemon = True
|
kcpp_instance.daemon = True
|
||||||
kcpp_instance.start()
|
kcpp_instance.start()
|
||||||
global_memory["restart_target"] = ""
|
global_memory["restart_target"] = ""
|
||||||
|
@ -5060,15 +5014,43 @@ def main(launch_args,start_server=True):
|
||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
except (KeyboardInterrupt,SystemExit):
|
except (KeyboardInterrupt,SystemExit):
|
||||||
break
|
break
|
||||||
|
if global_memory["input_to_exit"]:
|
||||||
|
print("===")
|
||||||
|
print("Press ENTER key to exit.", flush=True)
|
||||||
|
input()
|
||||||
|
|
||||||
def kcpp_main_process(launch_args, start_server=True, g_memory=None):
|
def kcpp_main_process(launch_args, g_memory=None):
|
||||||
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui, start_time, exitcounter, global_memory
|
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui, start_time, exitcounter, global_memory
|
||||||
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath
|
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath
|
||||||
|
|
||||||
|
start_server = True
|
||||||
|
|
||||||
args = launch_args
|
args = launch_args
|
||||||
global_memory = g_memory
|
global_memory = g_memory
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
|
if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1):
|
||||||
|
suppress_stdout()
|
||||||
|
|
||||||
|
# show the GUI launcher if a model was not provided
|
||||||
|
if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel):
|
||||||
|
#give them a chance to pick a file
|
||||||
|
print("For command line arguments, please refer to --help")
|
||||||
|
print("***")
|
||||||
|
try:
|
||||||
|
show_gui()
|
||||||
|
except Exception as ex:
|
||||||
|
exitcounter = 999
|
||||||
|
ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nYou must use the command line instead, e.g. python ./koboldcpp.py --help"
|
||||||
|
show_gui_msgbox("Warning, GUI failed to start",ermsg)
|
||||||
|
if args.skiplauncher:
|
||||||
|
print("Note: In order to use --skiplauncher, you need to specify a model with --model")
|
||||||
|
time.sleep(3)
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
if args.model_param and (args.benchmark or args.prompt):
|
||||||
|
start_server = False
|
||||||
|
|
||||||
#try to read story if provided
|
#try to read story if provided
|
||||||
if args.preloadstory:
|
if args.preloadstory:
|
||||||
global preloaded_story
|
global preloaded_story
|
||||||
|
@ -5644,11 +5626,9 @@ def kcpp_main_process(launch_args, start_server=True, g_memory=None):
|
||||||
print(f"Error writing benchmark to file: {e}")
|
print(f"Error writing benchmark to file: {e}")
|
||||||
global using_gui_launcher
|
global using_gui_launcher
|
||||||
if using_gui_launcher and not save_to_file:
|
if using_gui_launcher and not save_to_file:
|
||||||
print("===")
|
global_memory["input_to_exit"] = True
|
||||||
print("Press ENTER key to exit.", flush=True)
|
time.sleep(1)
|
||||||
input()
|
|
||||||
|
|
||||||
check_deprecation_warning()
|
|
||||||
if start_server:
|
if start_server:
|
||||||
if args.remotetunnel:
|
if args.remotetunnel:
|
||||||
if remote_url:
|
if remote_url:
|
||||||
|
@ -5663,6 +5643,8 @@ def kcpp_main_process(launch_args, start_server=True, g_memory=None):
|
||||||
print("Server was not started, main function complete. Idling.", flush=True)
|
print("Server was not started, main function complete. Idling.", flush=True)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
import multiprocessing
|
||||||
|
multiprocessing.freeze_support()
|
||||||
|
|
||||||
def check_range(value_type, min_value, max_value):
|
def check_range(value_type, min_value, max_value):
|
||||||
def range_checker(arg: str):
|
def range_checker(arg: str):
|
||||||
|
@ -5790,4 +5772,4 @@ if __name__ == '__main__':
|
||||||
compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true')
|
compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true')
|
||||||
compatgroup3.add_argument("--nommap", help=argparse.SUPPRESS, action='store_true')
|
compatgroup3.add_argument("--nommap", help=argparse.SUPPRESS, action='store_true')
|
||||||
|
|
||||||
main(parser.parse_args(),start_server=True)
|
main(parser.parse_args())
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue