From ea15dfab830d298c75088ea6369d40c003a5aa51 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 16 Mar 2026 23:56:34 +0800 Subject: [PATCH] added auto unload for admin mode --- koboldcpp.py | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 07ea51bf5..2690d9845 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -73,7 +73,7 @@ extra_images_max = 4 # for kontext/qwen img KcppVersion = "1.110" showdebug = True kcpp_instance = None #global running instance -global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_config_target":""} +global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_config_target":"", "last_active_timestamp":datetime.now(),"current_model":"initial_model"} using_gui_launcher = False handle = None @@ -3633,7 +3633,6 @@ class KcppProxyHandler(http.server.BaseHTTPRequestHandler): protocol_version = "HTTP/1.1" HOP_BY_HOP = { "connection", "keep-alive", "proxy-authenticate", "proxy-authorization", "te", "trailers", "transfer-encoding", "upgrade" } STREAM_CHUNK = 512 - current_model = "initial_model" def log_message(self, fmt, *args): global showdebug @@ -3677,6 +3676,7 @@ class KcppProxyHandler(http.server.BaseHTTPRequestHandler): headers[k] = v headers["Connection"] = "close" + global global_memory #specifically look for generation requests from completions or chat completions to handle hotswap is_post = self.command.upper() == "POST" is_completions_path = (self.path.endswith('/v1/completions') or self.path.endswith('/v1/completion') or self.path=='/completions') @@ -3690,9 +3690,10 @@ class KcppProxyHandler(http.server.BaseHTTPRequestHandler): except Exception: pass - if model_name and model_name != type(self).current_model: + if model_name and model_name != global_memory["current_model"]: with proxy_reload_lock: - if model_name != type(self).current_model: + if model_name != global_memory["current_model"]: + global_memory["last_active_timestamp"] = datetime.now() whitelist = get_current_admindir_list() # see if its an allowed swap if model_name in whitelist: reqbody = json.dumps({"filename":model_name}) @@ -3706,12 +3707,11 @@ class KcppProxyHandler(http.server.BaseHTTPRequestHandler): conn.request("POST", "/api/admin/reload_config", body=reqbody, headers=reqheaders) resp = conn.getresponse() time.sleep(3) + global_memory["last_active_timestamp"] = datetime.now() if not self.wait_for_upstream_ready(upstream_port,120,0.5): self.send_error(504, "KoboldCpp model swap reload timed out") return time.sleep(0.1) - type(self).current_model = model_name - time.sleep(0.1) try: # connect upstream conn = http.client.HTTPConnection('localhost', upstream_port, timeout=600) @@ -5015,18 +5015,7 @@ Change Mode
resp = {"success": True} else: dirpath = os.path.abspath(args.admindir) - valid_exts = (".kcpps", ".kcppt", ".gguf") - allowed_files = [] - with os.scandir(dirpath) as entries: # Scan top-level and 1-level deep - for entry in entries: - if entry.is_file() and entry.name.lower().endswith(valid_exts): - allowed_files.append(entry.name) - elif entry.is_dir(): - subdir_name = entry.name - with os.scandir(entry.path) as subentries: - for subentry in subentries: - if subentry.is_file() and subentry.name.lower().endswith(valid_exts): - allowed_files.append(os.path.join(subdir_name, subentry.name)) + allowed_files = get_current_admindir_list() # Normalize requested target path targetfilepath = os.path.abspath(os.path.join(dirpath, targetfile)) @@ -5177,7 +5166,7 @@ Change Mode
is_music_audio = False response_body = None use_jinja = args.jinja - + global_memory["last_active_timestamp"] = datetime.now() if self.path.endswith('/api/admin/check_state'): if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword): cur_states = [] @@ -6276,6 +6265,7 @@ def show_gui(): admin_password_var = ctk.StringVar() singleinstance_var = ctk.IntVar(value=0) router_mode_var = ctk.IntVar(value=0) + admin_unload_timeout_var = ctk.StringVar(value=str(0)) nozenity_var = ctk.IntVar(value=0) @@ -7089,6 +7079,7 @@ def show_gui(): makecheckbox(admin_tab, "Enable Model Administration", admin_var, 1, 0, command=toggleadmin,tooltiptxt="Enable a admin server, allowing you to remotely relaunch and swap models and configs.") makelabelentry(admin_tab, "Admin Password:" , admin_password_var, 3, 150,padx=(120),singleline=True,tooltip="Require a password to access admin functions. You are strongly advised to use one for publically accessible instances!") makefileentry(admin_tab, "Config Directory (Required):", "Select directory containing .gguf or .kcpps files to relaunch from", admin_dir_var, 5, width=280, dialog_type=2, tooltiptxt="Specify a directory to look for .kcpps configs in, which can be used to swap models.") + makelabelentry(admin_tab, "Auto Unload Timeout:" , admin_unload_timeout_var, 7, 70,padx=(150),singleline=True,tooltip="Set an idle timeout in seconds after which KoboldCpp will automatically unload the current model.") makecheckbox(admin_tab, "SingleInstance Mode", singleinstance_var, 10, 0,tooltiptxt="Allows this server to be shut down by another KoboldCpp instance with singleinstance starting on the same port.") router_mode_box = makecheckbox(admin_tab, "Router Mode", router_mode_var, 15, 0,tooltiptxt="Router mode uses a reverse proxy router, allowing you to easily hotswap models and configs within a single request. Requires admin mode.") @@ -7406,6 +7397,7 @@ def show_gui(): args.adminpassword = admin_password_var.get() args.singleinstance = (singleinstance_var.get()==1) args.routermode = router_mode_var.get()==1 + args.adminunloadtimeout = (0 if admin_unload_timeout_var.get()=="" else int(admin_unload_timeout_var.get())) args.showgui = False #prevent showgui from leaking into configs, its cli only def import_vars(dict): @@ -7661,6 +7653,7 @@ def show_gui(): router_mode_var.set(dict["routermode"] if ("routermode" in dict) else 0) admin_dir_var.set(dict["admindir"] if ("admindir" in dict and dict["admindir"]) else "") admin_password_var.set(dict["adminpassword"] if ("adminpassword" in dict and dict["adminpassword"]) else "") + admin_unload_timeout_var.set(dict["adminunloadtimeout"] if ("adminunloadtimeout" in dict and dict["adminunloadtimeout"]) else 0) singleinstance_var.set(dict["singleinstance"] if ("singleinstance" in dict) else 0) importvars_in_progress = False @@ -8178,7 +8171,7 @@ def reload_from_new_args(newargs): args.istemplate = False newargs = convert_invalid_args(newargs) for key, value in newargs.items(): #do not overwrite certain values - if key not in ["remotetunnel","showgui","port","host","port_param","admin","adminpassword","admindir","ssl","nocertify","benchmark","prompt","config","downloaddir"]: + if key not in ["remotetunnel","showgui","port","host","port_param","admin","adminpassword","password","adminunloadtimeout","routermode","admindir","ssl","nocertify","benchmark","prompt","config","downloaddir"]: setattr(args, key, value) setattr(args,"showgui",False) setattr(args,"benchmark",False) @@ -8706,7 +8699,7 @@ def main(launch_args, default_args): input() else: # manager command queue for admin mode with multiprocessing.Manager() as mp_manager: - global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_config_target":""}) + global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_config_target":"", "last_active_timestamp":datetime.now(),"current_model":"initial_model"}) if args.remotetunnel and not args.prompt and not args.benchmark and not args.cli: setuptunnel(global_memory, True if args.sdmodel else False) @@ -8746,6 +8739,14 @@ def main(launch_args, default_args): fault_recovery_mode = False restart_target = global_memory["restart_target"] restart_override_config_target = global_memory["restart_override_config_target"] + last_active = global_memory["last_active_timestamp"] + if last_active and args.adminunloadtimeout>0: + curtime = datetime.now() + elapsedtime = curtime - last_active + time_since_last_active = elapsedtime.total_seconds() + if time_since_last_active > args.adminunloadtimeout and global_memory["current_model"]!="unload_model": + print(f"[Unload Timeout] Inactive for over {time_since_last_active}s, unloading models...") + restart_target = "unload_model" if restart_target!="": overridetxt = ("" if not restart_override_config_target else f" with override config {restart_override_config_target}") print(f"Reloading new model/config: {restart_target}{overridetxt}") @@ -8791,6 +8792,7 @@ def main(launch_args, default_args): kcpp_instance.start() global_memory["restart_target"] = "" global_memory["restart_override_config_target"] = "" + global_memory["current_model"] = restart_target time.sleep(3) else: time.sleep(0.2) @@ -9958,6 +9960,7 @@ if __name__ == '__main__': admingroup.add_argument("--admin", help="Enables admin mode, allowing you to unload and reload different configurations or models.", action='store_true') admingroup.add_argument("--adminpassword", metavar=('[password]'), help="Require a password to access admin functions. You are strongly advised to use one for publically accessible instances!", default=None) admingroup.add_argument("--admindir", metavar=('[directory]'), help="Specify a directory to look for .kcpps configs in, which can be used to swap models.", default="") + admingroup.add_argument("--adminunloadtimeout", help="Set an idle timeout in seconds after which KoboldCpp will automatically unload the current model.", type=int, default=0) admingroup.add_argument("--routermode", help="Router mode uses a reverse proxy router, allowing you to easily hotswap models and configs within a single request. Requires admin mode.", action='store_true') deprecatedgroup = parser.add_argument_group('Deprecated Commands, DO NOT USE!')