fixed some global reference

This commit is contained in:
Concedo 2025-02-07 14:44:47 +08:00
parent ff9b4041da
commit b246d83dca
2 changed files with 59 additions and 77 deletions

View file

@ -171,7 +171,7 @@ if (LLAMA_HIPBLAS)
message(STATUS "HIP and hipBLAS found")
file(GLOB GGML_SOURCES_ROCM "ggml/src/ggml-cuda/*.cu")
list(APPEND GGML_SOURCES_ROCM "ggml/src/ggml-cuda/ggml-cuda.cu")
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu")
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-mma*.cu")
list(APPEND GGML_SOURCES_ROCM ${SRCS})
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
list(APPEND GGML_SOURCES_ROCM ${SRCS})

View file

@ -47,6 +47,13 @@ logit_bias_max = 512
dry_seq_break_max = 128
# global vars
KcppVersion = "1.83"
showdebug = True
guimode = False
kcpp_instance = None #global running instance
global_memory = None
using_gui_launcher = False
handle = None
friendlymodelname = "inactive"
friendlysdmodelname = "inactive"
@ -62,9 +69,6 @@ maxhordelen = 400
modelbusy = threading.Lock()
requestsinqueue = 0
defaultport = 5001
KcppVersion = "1.83"
showdebug = True
guimode = False
showsamplerwarning = True
showmaxctxwarning = True
showusedmemwarning = True
@ -100,10 +104,7 @@ start_time = time.time()
last_req_time = time.time()
last_non_horde_req_time = time.time()
currfinishreason = "null"
using_gui_launcher = False
using_outdated_flags = False
kcpp_instance = None #global running instance
global_memory = None
saved_stdout = None
saved_stderr = None
@ -4624,11 +4625,7 @@ def convert_outdated_args(args):
dict = args
if isinstance(args, argparse.Namespace):
dict = vars(args)
global using_outdated_flags
using_outdated_flags = False
if "sdconfig" in dict and dict["sdconfig"] and len(dict["sdconfig"])>0:
using_outdated_flags = True
dict["sdmodel"] = dict["sdconfig"][0]
if dict["sdconfig"] and len(dict["sdconfig"]) > 1:
dict["sdclamped"] = 512
@ -4637,7 +4634,6 @@ def convert_outdated_args(args):
if dict["sdconfig"] and len(dict["sdconfig"]) > 3:
dict["sdquant"] = (True if dict["sdconfig"][3]=="quant" else False)
if "hordeconfig" in dict and dict["hordeconfig"] and dict["hordeconfig"][0]!="":
using_outdated_flags = True
dict["hordemodelname"] = dict["hordeconfig"][0]
if len(dict["hordeconfig"]) > 1:
dict["hordegenlen"] = int(dict["hordeconfig"][1])
@ -4648,33 +4644,12 @@ def convert_outdated_args(args):
dict["hordeworkername"] = dict["hordeconfig"][4]
if "noblas" in dict and dict["noblas"]:
dict["usecpu"] = True
if "failsafe" in dict and dict["failsafe"]: #failsafe implies noavx2
dict["noavx2"] = True
if ("model_param" not in dict or not dict["model_param"]) and ("model" in dict and dict["model"]):
dict["model_param"] = dict["model"]
check_deprecation_warning()
return args
def check_deprecation_warning():
# slightly naggy warning to encourage people to switch to new flags
# if you want you can remove this at your own risk,
# but i am not going to troubleshoot or provide support for deprecated flags.
global using_outdated_flags
if using_outdated_flags:
print("\n=== !!! IMPORTANT WARNING !!! ===")
print("You are using one or more OUTDATED config files or launch flags!")
print("The flags --hordeconfig and --sdconfig have been DEPRECATED, and MAY be REMOVED in future!")
print("They will still work for now, but you SHOULD switch to the updated flags instead, to avoid future issues!")
print("New flags are: --hordemodelname --hordeworkername --hordekey --hordemaxctx --hordegenlen --sdmodel --sdthreads --sdquant --sdclamped")
print("For more information on these flags, please check --help")
print(">>> If you are using the GUI launcher, simply re-saving your config again will get rid of this warning.")
print("=== !!! IMPORTANT WARNING !!! ===\n")
def setuptunnel(global_memory, has_sd):
# This script will help setup a cloudflared tunnel for accessing KoboldCpp over the internet
# It should work out of the box on both linux and windows
@ -4829,8 +4804,12 @@ def reload_new_config(filename): #for changing config after launch
config = json.load(f)
args.istemplate = False
for key, value in config.items(): #do not overwrite certain values
if key not in ["remotetunnel","port","host","port_param","admin","adminpassword","admindir","ssl","nocertify","benchmark","prompt"]:
if key not in ["remotetunnel","showgui","port","host","port_param","admin","adminpassword","admindir","ssl","nocertify","benchmark","prompt","config"]:
setattr(args, key, value)
setattr(args,"showgui",False)
setattr(args,"benchmark",False)
setattr(args,"prompt","")
setattr(args,"config",None)
def load_config_cli(filename):
print("Loading .kcpps configuration file...")
@ -4941,7 +4920,7 @@ def analyze_gguf_model_wrapper(filename=""):
dumpthread = threading.Thread(target=analyze_gguf_model, args=(args,filename))
dumpthread.start()
def main(launch_args,start_server=True):
def main(launch_args):
global args, showdebug, kcpp_instance, exitcounter
args = launch_args #note: these are NOT shared with the child processes!
@ -4954,15 +4933,13 @@ def main(launch_args,start_server=True):
exit_with_error(1, "Error: Using --quantkv requires --flashattention")
args = convert_outdated_args(args)
if not ((args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1)):
print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}")
if args.debugmode != 1:
showdebug = False #not shared with child process!
if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1):
suppress_stdout()
print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually
#perform some basic cleanup of old temporary directories
try:
delete_old_pyinstaller()
delete_old_pyinstaller() #perform some basic cleanup of old temporary directories
except Exception as e:
print(f"Error cleaning up orphaned pyinstaller dirs: {e}")
@ -4974,10 +4951,7 @@ def main(launch_args,start_server=True):
analyze_gguf_model_wrapper(args.analyze)
return
if args.debugmode != 1:
showdebug = False #not shared with child process!
if args.config and len(args.config)==1: #handle config loading for launch
if args.config and len(args.config)==1: #handle initial config loading for launch
cfgname = args.config[0]
if isinstance(cfgname, str):
dlfile = download_model_from_url(cfgname,[".kcpps",".kcppt"])
@ -4999,35 +4973,15 @@ def main(launch_args,start_server=True):
args.model_param = dlfile
load_config_cli(args.model_param)
# lastly, show the GUI launcher if a model was not provided
if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel):
#give them a chance to pick a file
print("For command line arguments, please refer to --help")
print("***")
try:
show_gui()
except Exception as ex:
exitcounter = 999
ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nYou must use the command line instead, e.g. python ./koboldcpp.py --help"
show_gui_msgbox("Warning, GUI failed to start",ermsg)
if args.skiplauncher:
print("Note: In order to use --skiplauncher, you need to specify a model with --model")
time.sleep(3)
sys.exit(2)
if args.model_param and (args.benchmark or args.prompt):
start_server = False
# manager command queue
multiprocessing.freeze_support()
with multiprocessing.Manager() as mp_manager:
global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":""})
global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":"", "input_to_exit":False})
if start_server and args.remotetunnel:
if args.remotetunnel and not args.prompt and not args.benchmark:
setuptunnel(global_memory, True if args.sdmodel else False)
# invoke the main koboldcpp process
kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "start_server": start_server, "g_memory": global_memory})
kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "g_memory": global_memory})
kcpp_instance.daemon = True
kcpp_instance.start()
@ -5051,7 +5005,7 @@ def main(launch_args,start_server=True):
kcpp_instance = None
print("Restarting KoboldCpp...")
reload_new_config(targetfilepath)
kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "start_server": start_server, "g_memory": global_memory})
kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "g_memory": global_memory})
kcpp_instance.daemon = True
kcpp_instance.start()
global_memory["restart_target"] = ""
@ -5060,15 +5014,43 @@ def main(launch_args,start_server=True):
time.sleep(0.2)
except (KeyboardInterrupt,SystemExit):
break
if global_memory["input_to_exit"]:
print("===")
print("Press ENTER key to exit.", flush=True)
input()
def kcpp_main_process(launch_args, start_server=True, g_memory=None):
def kcpp_main_process(launch_args, g_memory=None):
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui, start_time, exitcounter, global_memory
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath
start_server = True
args = launch_args
global_memory = g_memory
start_time = time.time()
if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1):
suppress_stdout()
# show the GUI launcher if a model was not provided
if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel):
#give them a chance to pick a file
print("For command line arguments, please refer to --help")
print("***")
try:
show_gui()
except Exception as ex:
exitcounter = 999
ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nYou must use the command line instead, e.g. python ./koboldcpp.py --help"
show_gui_msgbox("Warning, GUI failed to start",ermsg)
if args.skiplauncher:
print("Note: In order to use --skiplauncher, you need to specify a model with --model")
time.sleep(3)
sys.exit(2)
if args.model_param and (args.benchmark or args.prompt):
start_server = False
#try to read story if provided
if args.preloadstory:
global preloaded_story
@ -5644,11 +5626,9 @@ def kcpp_main_process(launch_args, start_server=True, g_memory=None):
print(f"Error writing benchmark to file: {e}")
global using_gui_launcher
if using_gui_launcher and not save_to_file:
print("===")
print("Press ENTER key to exit.", flush=True)
input()
global_memory["input_to_exit"] = True
time.sleep(1)
check_deprecation_warning()
if start_server:
if args.remotetunnel:
if remote_url:
@ -5663,6 +5643,8 @@ def kcpp_main_process(launch_args, start_server=True, g_memory=None):
print("Server was not started, main function complete. Idling.", flush=True)
if __name__ == '__main__':
import multiprocessing
multiprocessing.freeze_support()
def check_range(value_type, min_value, max_value):
def range_checker(arg: str):
@ -5790,4 +5772,4 @@ if __name__ == '__main__':
compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true')
compatgroup3.add_argument("--nommap", help=argparse.SUPPRESS, action='store_true')
main(parser.parse_args(),start_server=True)
main(parser.parse_args())