fixed some global reference

This commit is contained in:
Concedo 2025-02-07 14:44:47 +08:00
parent ff9b4041da
commit b246d83dca
2 changed files with 59 additions and 77 deletions

View file

@ -171,7 +171,7 @@ if (LLAMA_HIPBLAS)
message(STATUS "HIP and hipBLAS found") message(STATUS "HIP and hipBLAS found")
file(GLOB GGML_SOURCES_ROCM "ggml/src/ggml-cuda/*.cu") file(GLOB GGML_SOURCES_ROCM "ggml/src/ggml-cuda/*.cu")
list(APPEND GGML_SOURCES_ROCM "ggml/src/ggml-cuda/ggml-cuda.cu") list(APPEND GGML_SOURCES_ROCM "ggml/src/ggml-cuda/ggml-cuda.cu")
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu") file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-mma*.cu")
list(APPEND GGML_SOURCES_ROCM ${SRCS}) list(APPEND GGML_SOURCES_ROCM ${SRCS})
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu") file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
list(APPEND GGML_SOURCES_ROCM ${SRCS}) list(APPEND GGML_SOURCES_ROCM ${SRCS})

View file

@ -47,6 +47,13 @@ logit_bias_max = 512
dry_seq_break_max = 128 dry_seq_break_max = 128
# global vars # global vars
KcppVersion = "1.83"
showdebug = True
guimode = False
kcpp_instance = None #global running instance
global_memory = None
using_gui_launcher = False
handle = None handle = None
friendlymodelname = "inactive" friendlymodelname = "inactive"
friendlysdmodelname = "inactive" friendlysdmodelname = "inactive"
@ -62,9 +69,6 @@ maxhordelen = 400
modelbusy = threading.Lock() modelbusy = threading.Lock()
requestsinqueue = 0 requestsinqueue = 0
defaultport = 5001 defaultport = 5001
KcppVersion = "1.83"
showdebug = True
guimode = False
showsamplerwarning = True showsamplerwarning = True
showmaxctxwarning = True showmaxctxwarning = True
showusedmemwarning = True showusedmemwarning = True
@ -100,10 +104,7 @@ start_time = time.time()
last_req_time = time.time() last_req_time = time.time()
last_non_horde_req_time = time.time() last_non_horde_req_time = time.time()
currfinishreason = "null" currfinishreason = "null"
using_gui_launcher = False
using_outdated_flags = False
kcpp_instance = None #global running instance
global_memory = None
saved_stdout = None saved_stdout = None
saved_stderr = None saved_stderr = None
@ -4624,11 +4625,7 @@ def convert_outdated_args(args):
dict = args dict = args
if isinstance(args, argparse.Namespace): if isinstance(args, argparse.Namespace):
dict = vars(args) dict = vars(args)
global using_outdated_flags
using_outdated_flags = False
if "sdconfig" in dict and dict["sdconfig"] and len(dict["sdconfig"])>0: if "sdconfig" in dict and dict["sdconfig"] and len(dict["sdconfig"])>0:
using_outdated_flags = True
dict["sdmodel"] = dict["sdconfig"][0] dict["sdmodel"] = dict["sdconfig"][0]
if dict["sdconfig"] and len(dict["sdconfig"]) > 1: if dict["sdconfig"] and len(dict["sdconfig"]) > 1:
dict["sdclamped"] = 512 dict["sdclamped"] = 512
@ -4637,7 +4634,6 @@ def convert_outdated_args(args):
if dict["sdconfig"] and len(dict["sdconfig"]) > 3: if dict["sdconfig"] and len(dict["sdconfig"]) > 3:
dict["sdquant"] = (True if dict["sdconfig"][3]=="quant" else False) dict["sdquant"] = (True if dict["sdconfig"][3]=="quant" else False)
if "hordeconfig" in dict and dict["hordeconfig"] and dict["hordeconfig"][0]!="": if "hordeconfig" in dict and dict["hordeconfig"] and dict["hordeconfig"][0]!="":
using_outdated_flags = True
dict["hordemodelname"] = dict["hordeconfig"][0] dict["hordemodelname"] = dict["hordeconfig"][0]
if len(dict["hordeconfig"]) > 1: if len(dict["hordeconfig"]) > 1:
dict["hordegenlen"] = int(dict["hordeconfig"][1]) dict["hordegenlen"] = int(dict["hordeconfig"][1])
@ -4648,33 +4644,12 @@ def convert_outdated_args(args):
dict["hordeworkername"] = dict["hordeconfig"][4] dict["hordeworkername"] = dict["hordeconfig"][4]
if "noblas" in dict and dict["noblas"]: if "noblas" in dict and dict["noblas"]:
dict["usecpu"] = True dict["usecpu"] = True
if "failsafe" in dict and dict["failsafe"]: #failsafe implies noavx2 if "failsafe" in dict and dict["failsafe"]: #failsafe implies noavx2
dict["noavx2"] = True dict["noavx2"] = True
if ("model_param" not in dict or not dict["model_param"]) and ("model" in dict and dict["model"]): if ("model_param" not in dict or not dict["model_param"]) and ("model" in dict and dict["model"]):
dict["model_param"] = dict["model"] dict["model_param"] = dict["model"]
check_deprecation_warning()
return args return args
def check_deprecation_warning():
# slightly naggy warning to encourage people to switch to new flags
# if you want you can remove this at your own risk,
# but i am not going to troubleshoot or provide support for deprecated flags.
global using_outdated_flags
if using_outdated_flags:
print("\n=== !!! IMPORTANT WARNING !!! ===")
print("You are using one or more OUTDATED config files or launch flags!")
print("The flags --hordeconfig and --sdconfig have been DEPRECATED, and MAY be REMOVED in future!")
print("They will still work for now, but you SHOULD switch to the updated flags instead, to avoid future issues!")
print("New flags are: --hordemodelname --hordeworkername --hordekey --hordemaxctx --hordegenlen --sdmodel --sdthreads --sdquant --sdclamped")
print("For more information on these flags, please check --help")
print(">>> If you are using the GUI launcher, simply re-saving your config again will get rid of this warning.")
print("=== !!! IMPORTANT WARNING !!! ===\n")
def setuptunnel(global_memory, has_sd): def setuptunnel(global_memory, has_sd):
# This script will help setup a cloudflared tunnel for accessing KoboldCpp over the internet # This script will help setup a cloudflared tunnel for accessing KoboldCpp over the internet
# It should work out of the box on both linux and windows # It should work out of the box on both linux and windows
@ -4829,8 +4804,12 @@ def reload_new_config(filename): #for changing config after launch
config = json.load(f) config = json.load(f)
args.istemplate = False args.istemplate = False
for key, value in config.items(): #do not overwrite certain values for key, value in config.items(): #do not overwrite certain values
if key not in ["remotetunnel","port","host","port_param","admin","adminpassword","admindir","ssl","nocertify","benchmark","prompt"]: if key not in ["remotetunnel","showgui","port","host","port_param","admin","adminpassword","admindir","ssl","nocertify","benchmark","prompt","config"]:
setattr(args, key, value) setattr(args, key, value)
setattr(args,"showgui",False)
setattr(args,"benchmark",False)
setattr(args,"prompt","")
setattr(args,"config",None)
def load_config_cli(filename): def load_config_cli(filename):
print("Loading .kcpps configuration file...") print("Loading .kcpps configuration file...")
@ -4941,7 +4920,7 @@ def analyze_gguf_model_wrapper(filename=""):
dumpthread = threading.Thread(target=analyze_gguf_model, args=(args,filename)) dumpthread = threading.Thread(target=analyze_gguf_model, args=(args,filename))
dumpthread.start() dumpthread.start()
def main(launch_args,start_server=True): def main(launch_args):
global args, showdebug, kcpp_instance, exitcounter global args, showdebug, kcpp_instance, exitcounter
args = launch_args #note: these are NOT shared with the child processes! args = launch_args #note: these are NOT shared with the child processes!
@ -4954,15 +4933,13 @@ def main(launch_args,start_server=True):
exit_with_error(1, "Error: Using --quantkv requires --flashattention") exit_with_error(1, "Error: Using --quantkv requires --flashattention")
args = convert_outdated_args(args) args = convert_outdated_args(args)
if not ((args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1)):
print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}")
if args.debugmode != 1:
showdebug = False #not shared with child process!
if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1):
suppress_stdout()
print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually
#perform some basic cleanup of old temporary directories
try: try:
delete_old_pyinstaller() delete_old_pyinstaller() #perform some basic cleanup of old temporary directories
except Exception as e: except Exception as e:
print(f"Error cleaning up orphaned pyinstaller dirs: {e}") print(f"Error cleaning up orphaned pyinstaller dirs: {e}")
@ -4974,10 +4951,7 @@ def main(launch_args,start_server=True):
analyze_gguf_model_wrapper(args.analyze) analyze_gguf_model_wrapper(args.analyze)
return return
if args.debugmode != 1: if args.config and len(args.config)==1: #handle initial config loading for launch
showdebug = False #not shared with child process!
if args.config and len(args.config)==1: #handle config loading for launch
cfgname = args.config[0] cfgname = args.config[0]
if isinstance(cfgname, str): if isinstance(cfgname, str):
dlfile = download_model_from_url(cfgname,[".kcpps",".kcppt"]) dlfile = download_model_from_url(cfgname,[".kcpps",".kcppt"])
@ -4999,35 +4973,15 @@ def main(launch_args,start_server=True):
args.model_param = dlfile args.model_param = dlfile
load_config_cli(args.model_param) load_config_cli(args.model_param)
# lastly, show the GUI launcher if a model was not provided
if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel):
#give them a chance to pick a file
print("For command line arguments, please refer to --help")
print("***")
try:
show_gui()
except Exception as ex:
exitcounter = 999
ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nYou must use the command line instead, e.g. python ./koboldcpp.py --help"
show_gui_msgbox("Warning, GUI failed to start",ermsg)
if args.skiplauncher:
print("Note: In order to use --skiplauncher, you need to specify a model with --model")
time.sleep(3)
sys.exit(2)
if args.model_param and (args.benchmark or args.prompt):
start_server = False
# manager command queue # manager command queue
multiprocessing.freeze_support()
with multiprocessing.Manager() as mp_manager: with multiprocessing.Manager() as mp_manager:
global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":""}) global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":"", "input_to_exit":False})
if start_server and args.remotetunnel: if args.remotetunnel and not args.prompt and not args.benchmark:
setuptunnel(global_memory, True if args.sdmodel else False) setuptunnel(global_memory, True if args.sdmodel else False)
# invoke the main koboldcpp process # invoke the main koboldcpp process
kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "start_server": start_server, "g_memory": global_memory}) kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "g_memory": global_memory})
kcpp_instance.daemon = True kcpp_instance.daemon = True
kcpp_instance.start() kcpp_instance.start()
@ -5051,7 +5005,7 @@ def main(launch_args,start_server=True):
kcpp_instance = None kcpp_instance = None
print("Restarting KoboldCpp...") print("Restarting KoboldCpp...")
reload_new_config(targetfilepath) reload_new_config(targetfilepath)
kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "start_server": start_server, "g_memory": global_memory}) kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "g_memory": global_memory})
kcpp_instance.daemon = True kcpp_instance.daemon = True
kcpp_instance.start() kcpp_instance.start()
global_memory["restart_target"] = "" global_memory["restart_target"] = ""
@ -5060,15 +5014,43 @@ def main(launch_args,start_server=True):
time.sleep(0.2) time.sleep(0.2)
except (KeyboardInterrupt,SystemExit): except (KeyboardInterrupt,SystemExit):
break break
if global_memory["input_to_exit"]:
print("===")
print("Press ENTER key to exit.", flush=True)
input()
def kcpp_main_process(launch_args, start_server=True, g_memory=None): def kcpp_main_process(launch_args, g_memory=None):
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui, start_time, exitcounter, global_memory global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui, start_time, exitcounter, global_memory
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath
start_server = True
args = launch_args args = launch_args
global_memory = g_memory global_memory = g_memory
start_time = time.time() start_time = time.time()
if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1):
suppress_stdout()
# show the GUI launcher if a model was not provided
if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel):
#give them a chance to pick a file
print("For command line arguments, please refer to --help")
print("***")
try:
show_gui()
except Exception as ex:
exitcounter = 999
ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nYou must use the command line instead, e.g. python ./koboldcpp.py --help"
show_gui_msgbox("Warning, GUI failed to start",ermsg)
if args.skiplauncher:
print("Note: In order to use --skiplauncher, you need to specify a model with --model")
time.sleep(3)
sys.exit(2)
if args.model_param and (args.benchmark or args.prompt):
start_server = False
#try to read story if provided #try to read story if provided
if args.preloadstory: if args.preloadstory:
global preloaded_story global preloaded_story
@ -5644,11 +5626,9 @@ def kcpp_main_process(launch_args, start_server=True, g_memory=None):
print(f"Error writing benchmark to file: {e}") print(f"Error writing benchmark to file: {e}")
global using_gui_launcher global using_gui_launcher
if using_gui_launcher and not save_to_file: if using_gui_launcher and not save_to_file:
print("===") global_memory["input_to_exit"] = True
print("Press ENTER key to exit.", flush=True) time.sleep(1)
input()
check_deprecation_warning()
if start_server: if start_server:
if args.remotetunnel: if args.remotetunnel:
if remote_url: if remote_url:
@ -5663,6 +5643,8 @@ def kcpp_main_process(launch_args, start_server=True, g_memory=None):
print("Server was not started, main function complete. Idling.", flush=True) print("Server was not started, main function complete. Idling.", flush=True)
if __name__ == '__main__': if __name__ == '__main__':
import multiprocessing
multiprocessing.freeze_support()
def check_range(value_type, min_value, max_value): def check_range(value_type, min_value, max_value):
def range_checker(arg: str): def range_checker(arg: str):
@ -5790,4 +5772,4 @@ if __name__ == '__main__':
compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true') compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true')
compatgroup3.add_argument("--nommap", help=argparse.SUPPRESS, action='store_true') compatgroup3.add_argument("--nommap", help=argparse.SUPPRESS, action='store_true')
main(parser.parse_args(),start_server=True) main(parser.parse_args())