diff --git a/CMakeLists.txt b/CMakeLists.txt index 178d693a6..bc4918c0a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,7 +171,7 @@ if (LLAMA_HIPBLAS) message(STATUS "HIP and hipBLAS found") file(GLOB GGML_SOURCES_ROCM "ggml/src/ggml-cuda/*.cu") list(APPEND GGML_SOURCES_ROCM "ggml/src/ggml-cuda/ggml-cuda.cu") - file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu") + file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-mma*.cu") list(APPEND GGML_SOURCES_ROCM ${SRCS}) file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu") list(APPEND GGML_SOURCES_ROCM ${SRCS}) diff --git a/koboldcpp.py b/koboldcpp.py index 017771354..3e52c0eb7 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -47,6 +47,13 @@ logit_bias_max = 512 dry_seq_break_max = 128 # global vars +KcppVersion = "1.83" +showdebug = True +guimode = False +kcpp_instance = None #global running instance +global_memory = None +using_gui_launcher = False + handle = None friendlymodelname = "inactive" friendlysdmodelname = "inactive" @@ -62,9 +69,6 @@ maxhordelen = 400 modelbusy = threading.Lock() requestsinqueue = 0 defaultport = 5001 -KcppVersion = "1.83" -showdebug = True -guimode = False showsamplerwarning = True showmaxctxwarning = True showusedmemwarning = True @@ -100,10 +104,7 @@ start_time = time.time() last_req_time = time.time() last_non_horde_req_time = time.time() currfinishreason = "null" -using_gui_launcher = False -using_outdated_flags = False -kcpp_instance = None #global running instance -global_memory = None + saved_stdout = None saved_stderr = None @@ -4624,11 +4625,7 @@ def convert_outdated_args(args): dict = args if isinstance(args, argparse.Namespace): dict = vars(args) - - global using_outdated_flags - using_outdated_flags = False if "sdconfig" in dict and dict["sdconfig"] and len(dict["sdconfig"])>0: - using_outdated_flags = True dict["sdmodel"] = dict["sdconfig"][0] if dict["sdconfig"] and len(dict["sdconfig"]) > 1: dict["sdclamped"] = 512 @@ -4637,7 +4634,6 @@ def convert_outdated_args(args): if dict["sdconfig"] and len(dict["sdconfig"]) > 3: dict["sdquant"] = (True if dict["sdconfig"][3]=="quant" else False) if "hordeconfig" in dict and dict["hordeconfig"] and dict["hordeconfig"][0]!="": - using_outdated_flags = True dict["hordemodelname"] = dict["hordeconfig"][0] if len(dict["hordeconfig"]) > 1: dict["hordegenlen"] = int(dict["hordeconfig"][1]) @@ -4648,33 +4644,12 @@ def convert_outdated_args(args): dict["hordeworkername"] = dict["hordeconfig"][4] if "noblas" in dict and dict["noblas"]: dict["usecpu"] = True - if "failsafe" in dict and dict["failsafe"]: #failsafe implies noavx2 dict["noavx2"] = True - if ("model_param" not in dict or not dict["model_param"]) and ("model" in dict and dict["model"]): dict["model_param"] = dict["model"] - - check_deprecation_warning() return args -def check_deprecation_warning(): - # slightly naggy warning to encourage people to switch to new flags - # if you want you can remove this at your own risk, - # but i am not going to troubleshoot or provide support for deprecated flags. - global using_outdated_flags - if using_outdated_flags: - print("\n=== !!! IMPORTANT WARNING !!! ===") - print("You are using one or more OUTDATED config files or launch flags!") - print("The flags --hordeconfig and --sdconfig have been DEPRECATED, and MAY be REMOVED in future!") - print("They will still work for now, but you SHOULD switch to the updated flags instead, to avoid future issues!") - print("New flags are: --hordemodelname --hordeworkername --hordekey --hordemaxctx --hordegenlen --sdmodel --sdthreads --sdquant --sdclamped") - print("For more information on these flags, please check --help") - print(">>> If you are using the GUI launcher, simply re-saving your config again will get rid of this warning.") - print("=== !!! IMPORTANT WARNING !!! ===\n") - - - def setuptunnel(global_memory, has_sd): # This script will help setup a cloudflared tunnel for accessing KoboldCpp over the internet # It should work out of the box on both linux and windows @@ -4829,8 +4804,12 @@ def reload_new_config(filename): #for changing config after launch config = json.load(f) args.istemplate = False for key, value in config.items(): #do not overwrite certain values - if key not in ["remotetunnel","port","host","port_param","admin","adminpassword","admindir","ssl","nocertify","benchmark","prompt"]: + if key not in ["remotetunnel","showgui","port","host","port_param","admin","adminpassword","admindir","ssl","nocertify","benchmark","prompt","config"]: setattr(args, key, value) + setattr(args,"showgui",False) + setattr(args,"benchmark",False) + setattr(args,"prompt","") + setattr(args,"config",None) def load_config_cli(filename): print("Loading .kcpps configuration file...") @@ -4941,7 +4920,7 @@ def analyze_gguf_model_wrapper(filename=""): dumpthread = threading.Thread(target=analyze_gguf_model, args=(args,filename)) dumpthread.start() -def main(launch_args,start_server=True): +def main(launch_args): global args, showdebug, kcpp_instance, exitcounter args = launch_args #note: these are NOT shared with the child processes! @@ -4954,15 +4933,13 @@ def main(launch_args,start_server=True): exit_with_error(1, "Error: Using --quantkv requires --flashattention") args = convert_outdated_args(args) + if not ((args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1)): + print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") + if args.debugmode != 1: + showdebug = False #not shared with child process! - if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1): - suppress_stdout() - - print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually - - #perform some basic cleanup of old temporary directories try: - delete_old_pyinstaller() + delete_old_pyinstaller() #perform some basic cleanup of old temporary directories except Exception as e: print(f"Error cleaning up orphaned pyinstaller dirs: {e}") @@ -4974,10 +4951,7 @@ def main(launch_args,start_server=True): analyze_gguf_model_wrapper(args.analyze) return - if args.debugmode != 1: - showdebug = False #not shared with child process! - - if args.config and len(args.config)==1: #handle config loading for launch + if args.config and len(args.config)==1: #handle initial config loading for launch cfgname = args.config[0] if isinstance(cfgname, str): dlfile = download_model_from_url(cfgname,[".kcpps",".kcppt"]) @@ -4999,35 +4973,15 @@ def main(launch_args,start_server=True): args.model_param = dlfile load_config_cli(args.model_param) - # lastly, show the GUI launcher if a model was not provided - if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel): - #give them a chance to pick a file - print("For command line arguments, please refer to --help") - print("***") - try: - show_gui() - except Exception as ex: - exitcounter = 999 - ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nYou must use the command line instead, e.g. python ./koboldcpp.py --help" - show_gui_msgbox("Warning, GUI failed to start",ermsg) - if args.skiplauncher: - print("Note: In order to use --skiplauncher, you need to specify a model with --model") - time.sleep(3) - sys.exit(2) - - if args.model_param and (args.benchmark or args.prompt): - start_server = False - # manager command queue - multiprocessing.freeze_support() with multiprocessing.Manager() as mp_manager: - global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":""}) + global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":"", "input_to_exit":False}) - if start_server and args.remotetunnel: + if args.remotetunnel and not args.prompt and not args.benchmark: setuptunnel(global_memory, True if args.sdmodel else False) # invoke the main koboldcpp process - kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "start_server": start_server, "g_memory": global_memory}) + kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "g_memory": global_memory}) kcpp_instance.daemon = True kcpp_instance.start() @@ -5051,7 +5005,7 @@ def main(launch_args,start_server=True): kcpp_instance = None print("Restarting KoboldCpp...") reload_new_config(targetfilepath) - kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "start_server": start_server, "g_memory": global_memory}) + kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "g_memory": global_memory}) kcpp_instance.daemon = True kcpp_instance.start() global_memory["restart_target"] = "" @@ -5060,15 +5014,43 @@ def main(launch_args,start_server=True): time.sleep(0.2) except (KeyboardInterrupt,SystemExit): break + if global_memory["input_to_exit"]: + print("===") + print("Press ENTER key to exit.", flush=True) + input() -def kcpp_main_process(launch_args, start_server=True, g_memory=None): +def kcpp_main_process(launch_args, g_memory=None): global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui, start_time, exitcounter, global_memory global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath + start_server = True + args = launch_args global_memory = g_memory start_time = time.time() + if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1): + suppress_stdout() + + # show the GUI launcher if a model was not provided + if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel): + #give them a chance to pick a file + print("For command line arguments, please refer to --help") + print("***") + try: + show_gui() + except Exception as ex: + exitcounter = 999 + ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nYou must use the command line instead, e.g. python ./koboldcpp.py --help" + show_gui_msgbox("Warning, GUI failed to start",ermsg) + if args.skiplauncher: + print("Note: In order to use --skiplauncher, you need to specify a model with --model") + time.sleep(3) + sys.exit(2) + + if args.model_param and (args.benchmark or args.prompt): + start_server = False + #try to read story if provided if args.preloadstory: global preloaded_story @@ -5644,11 +5626,9 @@ def kcpp_main_process(launch_args, start_server=True, g_memory=None): print(f"Error writing benchmark to file: {e}") global using_gui_launcher if using_gui_launcher and not save_to_file: - print("===") - print("Press ENTER key to exit.", flush=True) - input() + global_memory["input_to_exit"] = True + time.sleep(1) - check_deprecation_warning() if start_server: if args.remotetunnel: if remote_url: @@ -5663,6 +5643,8 @@ def kcpp_main_process(launch_args, start_server=True, g_memory=None): print("Server was not started, main function complete. Idling.", flush=True) if __name__ == '__main__': + import multiprocessing + multiprocessing.freeze_support() def check_range(value_type, min_value, max_value): def range_checker(arg: str): @@ -5790,4 +5772,4 @@ if __name__ == '__main__': compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true') compatgroup3.add_argument("--nommap", help=argparse.SUPPRESS, action='store_true') - main(parser.parse_args(),start_server=True) + main(parser.parse_args())