fixed some global reference

2025-09-11 01:24:36 +00:00 · 2025-02-07 14:44:47 +08:00 · 2025-02-07 14:44:47 +08:00 · b246d83dca
commit b246d83dca
parent ff9b4041da
2 changed files with 59 additions and 77 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -171,7 +171,7 @@ if (LLAMA_HIPBLAS)
        message(STATUS "HIP and hipBLAS found")
        file(GLOB GGML_SOURCES_ROCM "ggml/src/ggml-cuda/*.cu")
        list(APPEND GGML_SOURCES_ROCM "ggml/src/ggml-cuda/ggml-cuda.cu")
-        file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu")
+        file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-mma*.cu")
        list(APPEND GGML_SOURCES_ROCM ${SRCS})
        file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
        list(APPEND GGML_SOURCES_ROCM ${SRCS})
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -47,6 +47,13 @@ logit_bias_max = 512
 dry_seq_break_max = 128
 # global vars
 KcppVersion = "1.83"
 showdebug = True
 guimode = False
 kcpp_instance = None #global running instance
 global_memory = None
 using_gui_launcher = False
 handle = None
 friendlymodelname = "inactive"
 friendlysdmodelname = "inactive"
@ -62,9 +69,6 @@ maxhordelen = 400
 modelbusy = threading.Lock()
 requestsinqueue = 0
 defaultport = 5001
 KcppVersion = "1.83"
 showdebug = True
 guimode = False
 showsamplerwarning = True
 showmaxctxwarning = True
 showusedmemwarning = True
@ -100,10 +104,7 @@ start_time = time.time()
 last_req_time = time.time()
 last_non_horde_req_time = time.time()
 currfinishreason = "null"
-using_gui_launcher = False
+
 using_outdated_flags = False
 kcpp_instance = None #global running instance
 global_memory = None
 saved_stdout = None
 saved_stderr = None
@ -4624,11 +4625,7 @@ def convert_outdated_args(args):
    dict = args
    if isinstance(args, argparse.Namespace):
        dict = vars(args)
    global using_outdated_flags
    using_outdated_flags = False
    if "sdconfig" in dict and dict["sdconfig"] and len(dict["sdconfig"])>0:
        using_outdated_flags = True
        dict["sdmodel"] = dict["sdconfig"][0]
        if dict["sdconfig"] and len(dict["sdconfig"]) > 1:
            dict["sdclamped"] = 512
@ -4637,7 +4634,6 @@ def convert_outdated_args(args):
        if dict["sdconfig"] and len(dict["sdconfig"]) > 3:
            dict["sdquant"] = (True if dict["sdconfig"][3]=="quant" else False)
    if "hordeconfig" in dict and dict["hordeconfig"] and dict["hordeconfig"][0]!="":
        using_outdated_flags = True
        dict["hordemodelname"] = dict["hordeconfig"][0]
        if len(dict["hordeconfig"]) > 1:
            dict["hordegenlen"] = int(dict["hordeconfig"][1])
@ -4648,33 +4644,12 @@ def convert_outdated_args(args):
            dict["hordeworkername"] = dict["hordeconfig"][4]
    if "noblas" in dict and dict["noblas"]:
        dict["usecpu"] = True
    if "failsafe" in dict and dict["failsafe"]: #failsafe implies noavx2
        dict["noavx2"] = True
    if ("model_param" not in dict or not dict["model_param"]) and ("model" in dict and dict["model"]):
        dict["model_param"] = dict["model"]
    check_deprecation_warning()
    return args
 def check_deprecation_warning():
    # slightly naggy warning to encourage people to switch to new flags
    # if you want you can remove this at your own risk,
    # but i am not going to troubleshoot or provide support for deprecated flags.
    global using_outdated_flags
    if using_outdated_flags:
        print("\n=== !!! IMPORTANT WARNING !!! ===")
        print("You are using one or more OUTDATED config files or launch flags!")
        print("The flags --hordeconfig and --sdconfig have been DEPRECATED, and MAY be REMOVED in future!")
        print("They will still work for now, but you SHOULD switch to the updated flags instead, to avoid future issues!")
        print("New flags are: --hordemodelname --hordeworkername --hordekey --hordemaxctx --hordegenlen --sdmodel --sdthreads --sdquant --sdclamped")
        print("For more information on these flags, please check --help")
        print(">>> If you are using the GUI launcher, simply re-saving your config again will get rid of this warning.")
        print("=== !!! IMPORTANT WARNING !!! ===\n")
 def setuptunnel(global_memory, has_sd):
    # This script will help setup a cloudflared tunnel for accessing KoboldCpp over the internet
    # It should work out of the box on both linux and windows
@ -4829,8 +4804,12 @@ def reload_new_config(filename): #for changing config after launch
        config = json.load(f)
        args.istemplate = False
        for key, value in config.items(): #do not overwrite certain values
-            if key not in ["remotetunnel","port","host","port_param","admin","adminpassword","admindir","ssl","nocertify","benchmark","prompt"]:
+            if key not in ["remotetunnel","showgui","port","host","port_param","admin","adminpassword","admindir","ssl","nocertify","benchmark","prompt","config"]:
                setattr(args, key, value)
        setattr(args,"showgui",False)
        setattr(args,"benchmark",False)
        setattr(args,"prompt","")
        setattr(args,"config",None)
 def load_config_cli(filename):
    print("Loading .kcpps configuration file...")
@ -4941,7 +4920,7 @@ def analyze_gguf_model_wrapper(filename=""):
    dumpthread = threading.Thread(target=analyze_gguf_model, args=(args,filename))
    dumpthread.start()
-def main(launch_args,start_server=True):
+def main(launch_args):
    global args, showdebug, kcpp_instance, exitcounter
    args = launch_args #note: these are NOT shared with the child processes!
@ -4954,15 +4933,13 @@ def main(launch_args,start_server=True):
        exit_with_error(1, "Error: Using --quantkv requires --flashattention")
    args = convert_outdated_args(args)
    if not ((args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1)):
        print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}")
    if args.debugmode != 1:
        showdebug = False #not shared with child process!
    if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1):
        suppress_stdout()
    print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually
    #perform some basic cleanup of old temporary directories
    try:
-        delete_old_pyinstaller()
+        delete_old_pyinstaller()  #perform some basic cleanup of old temporary directories
    except Exception as e:
        print(f"Error cleaning up orphaned pyinstaller dirs: {e}")
@ -4974,10 +4951,7 @@ def main(launch_args,start_server=True):
        analyze_gguf_model_wrapper(args.analyze)
        return
-    if args.debugmode != 1:
+    if args.config and len(args.config)==1: #handle initial config loading for launch
        showdebug = False #not shared with child process!
    if args.config and len(args.config)==1: #handle config loading for launch
        cfgname = args.config[0]
        if isinstance(cfgname, str):
            dlfile = download_model_from_url(cfgname,[".kcpps",".kcppt"])
@ -4999,35 +4973,15 @@ def main(launch_args,start_server=True):
            args.model_param = dlfile
        load_config_cli(args.model_param)
    # lastly, show the GUI launcher if a model was not provided
    if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel):
        #give them a chance to pick a file
        print("For command line arguments, please refer to --help")
        print("***")
        try:
            show_gui()
        except Exception as ex:
            exitcounter = 999
            ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nYou must use the command line instead, e.g. python ./koboldcpp.py --help"
            show_gui_msgbox("Warning, GUI failed to start",ermsg)
            if args.skiplauncher:
                print("Note: In order to use --skiplauncher, you need to specify a model with --model")
            time.sleep(3)
            sys.exit(2)
    if args.model_param and (args.benchmark or args.prompt):
        start_server = False
    # manager command queue
    multiprocessing.freeze_support()
    with multiprocessing.Manager() as mp_manager:
-        global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":""})
+        global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":"", "input_to_exit":False})
-        if start_server and args.remotetunnel:
+        if args.remotetunnel and not args.prompt and not args.benchmark:
            setuptunnel(global_memory, True if args.sdmodel else False)
        # invoke the main koboldcpp process
-        kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "start_server": start_server, "g_memory": global_memory})
+        kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "g_memory": global_memory})
        kcpp_instance.daemon = True
        kcpp_instance.start()
@ -5051,7 +5005,7 @@ def main(launch_args,start_server=True):
                            kcpp_instance = None
                            print("Restarting KoboldCpp...")
                            reload_new_config(targetfilepath)
-                            kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "start_server": start_server, "g_memory": global_memory})
+                            kcpp_instance = multiprocessing.Process(target=kcpp_main_process,kwargs={"launch_args": args, "g_memory": global_memory})
                            kcpp_instance.daemon = True
                            kcpp_instance.start()
                            global_memory["restart_target"] = ""
@ -5060,15 +5014,43 @@ def main(launch_args,start_server=True):
                    time.sleep(0.2)
            except (KeyboardInterrupt,SystemExit):
                break
        if global_memory["input_to_exit"]:
            print("===")
            print("Press ENTER key to exit.", flush=True)
            input()
-def kcpp_main_process(launch_args, start_server=True, g_memory=None):
+def kcpp_main_process(launch_args, g_memory=None):
    global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui, start_time, exitcounter, global_memory
    global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath
    start_server = True
    args = launch_args
    global_memory = g_memory
    start_time = time.time()
    if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1):
        suppress_stdout()
    # show the GUI launcher if a model was not provided
    if args.showgui or (not args.model_param and not args.sdmodel and not args.whispermodel and not args.ttsmodel and not args.nomodel):
        #give them a chance to pick a file
        print("For command line arguments, please refer to --help")
        print("***")
        try:
            show_gui()
        except Exception as ex:
            exitcounter = 999
            ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\n\nYou must use the command line instead, e.g. python ./koboldcpp.py --help"
            show_gui_msgbox("Warning, GUI failed to start",ermsg)
            if args.skiplauncher:
                print("Note: In order to use --skiplauncher, you need to specify a model with --model")
            time.sleep(3)
            sys.exit(2)
    if args.model_param and (args.benchmark or args.prompt):
        start_server = False
    #try to read story if provided
    if args.preloadstory:
        global preloaded_story
@ -5644,11 +5626,9 @@ def kcpp_main_process(launch_args, start_server=True, g_memory=None):
                    print(f"Error writing benchmark to file: {e}")
            global using_gui_launcher
            if using_gui_launcher and not save_to_file:
-                print("===")
+                global_memory["input_to_exit"] = True
-                print("Press ENTER key to exit.", flush=True)
+                time.sleep(1)
                input()
    check_deprecation_warning()
    if start_server:
        if args.remotetunnel:
            if remote_url:
@ -5663,6 +5643,8 @@ def kcpp_main_process(launch_args, start_server=True, g_memory=None):
            print("Server was not started, main function complete. Idling.", flush=True)
 if __name__ == '__main__':
    import multiprocessing
    multiprocessing.freeze_support()
    def check_range(value_type, min_value, max_value):
        def range_checker(arg: str):
@ -5790,4 +5772,4 @@ if __name__ == '__main__':
    compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true')
    compatgroup3.add_argument("--nommap", help=argparse.SUPPRESS, action='store_true')
-    main(parser.parse_args(),start_server=True)
+    main(parser.parse_args())