wip router mode

This commit is contained in:
Concedo 2026-03-14 17:07:05 +08:00
parent 1d067933f0
commit 8b9594b6ea

View file

@ -3529,11 +3529,101 @@ def LaunchWebbrowser(target_url, failedmsg):
def get_my_epurl():
global sslvalid
httpsaffix = ("https" if sslvalid else "http")
epurl = f"{httpsaffix}://localhost:{args.port}"
displayedport = (args.port if not args.proxy_port else args.proxy_port)
epurl = f"{httpsaffix}://localhost:{displayedport}"
if args.host!="":
epurl = f"{httpsaffix}://{args.host}:{args.port}"
epurl = f"{httpsaffix}://{args.host}:{displayedport}"
return epurl
###########################################################
### A simple reverse proxy used in Kcpp Router mode ###
###########################################################
class KcppProxyHandler(http.server.BaseHTTPRequestHandler):
sys_version = "1"
server_version = "KoboldCppServer"
protocol_version = "HTTP/1.1"
HOP_BY_HOP = { "connection", "keep-alive", "proxy-authenticate", "proxy-authorization", "te", "trailers", "transfer-encoding", "upgrade" }
STREAM_CHUNK = 512
def log_message(self, fmt, *args):
global showdebug
if showdebug:
print(f"[proxy] {self.address_string()} {fmt % args}", flush=True)
pass
def _handle(self):
upstream_port = self.server.upstream_port
length = self.headers.get("Content-Length") # read request body
body = None
if length:
body = self.rfile.read(int(length))
headers = {} # forward headers
for k, v in self.headers.items():
if k.lower() not in self.HOP_BY_HOP:
headers[k] = v
headers["Connection"] = "close"
# maybe_stall_for_model_swap(self.path, request_body)
try: # connect upstream
conn = http.client.HTTPConnection('localhost', upstream_port, timeout=600)
conn.request( self.command, self.path, body=body, headers=headers)
resp = conn.getresponse()
except OSError as e:
self.send_error(502, f"KoboldCpp proxy connection failed: {e}")
return
self.send_response(resp.status, resp.reason) # forward response headers
for k, v in resp.getheaders():
lk = k.lower()
if lk in self.HOP_BY_HOP:
continue
self.send_header(k, v)
self.end_headers()
self.close_connection = True
try: # stream response
while True:
chunk = resp.read(self.STREAM_CHUNK)
if not chunk:
break
self.wfile.write(chunk)
self.wfile.flush()
except (BrokenPipeError, ConnectionResetError):
pass
finally:
conn.close()
# proxy all HTTP methods
do_GET = _handle
do_POST = _handle
do_PUT = _handle
do_DELETE = _handle
do_PATCH = _handle
do_OPTIONS = _handle
do_HEAD = _handle
class KcppProxyHttpServer(http.server.HTTPServer):
def __init__(self, server_address, RequestHandlerClass, upstream_port):
self.upstream_port = upstream_port
super().__init__(server_address, RequestHandlerClass)
def process_request(self, request, client_address):
thread = threading.Thread(target=self._worker,args=(request, client_address),daemon=True)
thread.start()
def _worker(self, request, client_address):
try:
self.finish_request(request, client_address)
finally:
self.shutdown_request(request)
def run_router_proxy(proxy_port, upstream_port):
server = KcppProxyHttpServer(("", proxy_port), KcppProxyHandler, upstream_port)
print(f"KoboldCpp Proxy starting on port {proxy_port}, forwarding to port {upstream_port}",flush=True)
proxy_thread = threading.Thread(target=server.serve_forever, daemon=True)
proxy_thread.start()
return server # Return the server object in case you need to shut it down later
#################################################################
### A hacky simple HTTP server simulating a kobold api by Concedo
### we are intentionally NOT using flask, because we want MINIMAL dependencies
@ -5778,6 +5868,7 @@ def show_gui():
togglehorde(1,1,1)
toggletaesd(1,1,1)
togglejinja(1,1,1)
toggleadmin(1,1,1)
tabbuttonaction(tabnames[curr_tab_idx])
pass
def on_resize(event):
@ -6010,6 +6101,7 @@ def show_gui():
admin_dir_var = ctk.StringVar()
admin_password_var = ctk.StringVar()
singleinstance_var = ctk.IntVar(value=0)
router_mode_var = ctk.IntVar(value=0)
nozenity_var = ctk.IntVar(value=0)
@ -6816,10 +6908,15 @@ def show_gui():
autopath = sys.executable
autopath = os.path.dirname(autopath)
admin_dir_var.set(autopath)
if admin_var.get()==1:
router_mode_box.grid()
else:
router_mode_box.grid_remove()
makecheckbox(admin_tab, "Enable Model Administration", admin_var, 1, 0, command=toggleadmin,tooltiptxt="Enable a admin server, allowing you to remotely relaunch and swap models and configs.")
makelabelentry(admin_tab, "Admin Password:" , admin_password_var, 3, 150,padx=(120),singleline=True,tooltip="Require a password to access admin functions. You are strongly advised to use one for publically accessible instances!")
makefileentry(admin_tab, "Config Directory (Required):", "Select directory containing .gguf or .kcpps files to relaunch from", admin_dir_var, 5, width=280, dialog_type=2, tooltiptxt="Specify a directory to look for .kcpps configs in, which can be used to swap models.")
makecheckbox(admin_tab, "SingleInstance Mode", singleinstance_var, 10, 0,tooltiptxt="Allows this server to be shut down by another KoboldCpp instance with singleinstance starting on the same port.")
router_mode_box = makecheckbox(admin_tab, "Router Mode", router_mode_var, 15, 0,tooltiptxt="Router mode uses a reverse proxy router, allowing you to easily hotswap models and configs within a single request. Requires admin mode.")
def kcpp_export_template():
nonlocal kcpp_exporting_template
@ -6893,6 +6990,7 @@ def show_gui():
togglectxshift(1,1,1)
togglehorde(1,1,1)
togglejinja(1,1,1)
toggleadmin(1,1,1)
# launch
def guilaunch():
@ -7132,6 +7230,7 @@ def show_gui():
args.admindir = admin_dir_var.get()
args.adminpassword = admin_password_var.get()
args.singleinstance = (singleinstance_var.get()==1)
args.routermode = router_mode_var.get()==1
args.showgui = False #prevent showgui from leaking into configs, its cli only
def import_vars(dict):
@ -7381,6 +7480,7 @@ def show_gui():
embeddings_gpu_var.set(dict["embeddingsgpu"] if ("embeddingsgpu" in dict) else 0)
admin_var.set(dict["admin"] if ("admin" in dict) else 0)
router_mode_var.set(dict["routermode"] if ("routermode" in dict) else 0)
admin_dir_var.set(dict["admindir"] if ("admindir" in dict and dict["admindir"]) else "")
admin_password_var.set(dict["adminpassword"] if ("adminpassword" in dict and dict["adminpassword"]) else "")
singleinstance_var.set(dict["singleinstance"] if ("singleinstance" in dict) else 0)
@ -7785,6 +7885,8 @@ def convert_invalid_args(args):
dict["model_param"] = model_value
elif isinstance(model_value, list) and model_value: # Non-empty list
dict["model_param"] = model_value[0] # Take the first file in the list
if ("port_param" in dict and dict["port_param"] and dict["port_param"]!=defaultport):
dict["port"] = dict["port_param"]
if "sdnotile" in dict and "sdtiledvae" not in dict:
dict["sdtiledvae"] = (0 if (dict["sdnotile"]) else default_vae_tile_threshold) # convert legacy option
if 'sdquant' in dict and type(dict['sdquant']) is bool:
@ -7832,13 +7934,14 @@ def setuptunnel(global_memory, has_sd):
tunnelbinary = "./cloudflared-linux-amd64"
tunnelproc = None
displayedport = (args.port if not args.proxy_port else args.proxy_port)
if sys.platform == "linux":
clean_env = os.environ.copy()
clean_env.pop("LD_LIBRARY_PATH", None)
clean_env["PATH"] = "/usr/bin:/bin"
tunnelproc = subprocess.Popen(f"{tunnelbinary} tunnel --url {httpsaffix}://localhost:{int(args.port)}{ssladd}", text=True, encoding='utf-8', shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, env=clean_env)
tunnelproc = subprocess.Popen(f"{tunnelbinary} tunnel --url {httpsaffix}://localhost:{int(displayedport)}{ssladd}", text=True, encoding='utf-8', shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, env=clean_env)
else:
tunnelproc = subprocess.Popen(f"{tunnelbinary} tunnel --url {httpsaffix}://localhost:{int(args.port)}{ssladd}", text=True, encoding='utf-8', shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
tunnelproc = subprocess.Popen(f"{tunnelbinary} tunnel --url {httpsaffix}://localhost:{int(displayedport)}{ssladd}", text=True, encoding='utf-8', shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
time.sleep(10)
def tunnel_reader():
@ -8380,9 +8483,29 @@ def main(launch_args, default_args):
if len(args.ssl)==2 and isinstance(args.ssl[0], str) and os.path.exists(args.ssl[0]) and isinstance(args.ssl[1], str) and os.path.exists(args.ssl[1]):
sslvalid = True
args.proxy_port = None #normally unused
if args.routermode:
if not args.admin:
print("\nWARNING: Router mode requires admin, enabling admin...")
args.admin = True
# setup router mode, find a usable high port swap the port
newport = 15001
for prt in range(15001,15011):
if not is_port_in_use(prt):
newport = prt
break
args.proxy_port = args.port_param
args.port = args.port_param = newport
run_router_proxy(args.proxy_port,newport)
if args.admin and not args.admindir:
args.admin = False
print("\nWARNING: Admin was set without selecting an admin directory. Admin cannot be used.\n")
print("\nWARNING: Admin was set without selecting an admin directory. Selecting current executable directory...")
autopath = os.path.realpath(__file__)
if getattr(sys, 'frozen', False):
autopath = sys.executable
autopath = os.path.dirname(autopath)
args.admindir = autopath
print(f"Admin Directory Set: {autopath}\n")
if not args.admin: #run in single process mode
if args.remotetunnel and not args.prompt and not args.benchmark and not args.cli:
@ -8769,14 +8892,12 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
args.defaultgenamt = max(64, min(args.defaultgenamt, 8192))
args.defaultgenamt = min(args.defaultgenamt, maxctx / 2)
if args.port_param!=defaultport:
args.port = args.port_param
if start_server and args.singleinstance and is_port_in_use(args.port):
displayedport = (args.port if not args.proxy_port else args.proxy_port)
if start_server and args.singleinstance and is_port_in_use(displayedport):
try:
print(f"Warning: Port {args.port} already appears to be in use by another program.")
print(f"Attempting to request shutdown of previous instance on port {args.port}...")
shutdownreq = make_url_request(f'http://localhost:{args.port}/api/extra/shutdown',{},timeout=5)
print(f"Warning: Port {displayedport} already appears to be in use by another program.")
print(f"Attempting to request shutdown of previous instance on port {displayedport}...")
shutdownreq = make_url_request(f'http://localhost:{displayedport}/api/extra/shutdown',{},timeout=5)
shutdownok = (shutdownreq and "success" in shutdownreq and shutdownreq["success"] is True)
time.sleep(2)
print("Shutdown existing successful!" if shutdownok else "Shutdown existing failed!")
@ -9250,15 +9371,16 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
endpoint_url = ""
remote_url = ""
httpsaffix = ("https" if sslvalid else "http")
displayedport = (args.port if not args.proxy_port else args.proxy_port)
if args.host=="":
endpoint_url = f"{httpsaffix}://localhost:{args.port}"
endpoint_url = f"{httpsaffix}://localhost:{displayedport}"
else:
endpoint_url = f"{httpsaffix}://{args.host}:{args.port}"
endpoint_url = f"{httpsaffix}://{args.host}:{displayedport}"
if start_server:
if not args.remotetunnel:
print(f"Starting Kobold API on port {args.port} at {endpoint_url}/api/")
print(f"Starting OpenAI Compatible API on port {args.port} at {endpoint_url}/v1/")
print(f"Starting Kobold API on port {displayedport} at {endpoint_url}/api/")
print(f"Starting OpenAI Compatible API on port {displayedport} at {endpoint_url}/v1/")
print(f"Starting llama.cpp secondary WebUI at {endpoint_url}/lcpp/")
if args.sdmodel:
print(f"StableUI is available at {endpoint_url}/sdui/")
@ -9586,6 +9708,7 @@ if __name__ == '__main__':
admingroup.add_argument("--admin", help="Enables admin mode, allowing you to unload and reload different configurations or models.", action='store_true')
admingroup.add_argument("--adminpassword", metavar=('[password]'), help="Require a password to access admin functions. You are strongly advised to use one for publically accessible instances!", default=None)
admingroup.add_argument("--admindir", metavar=('[directory]'), help="Specify a directory to look for .kcpps configs in, which can be used to swap models.", default="")
admingroup.add_argument("--routermode", help="Router mode uses a reverse proxy router, allowing you to easily hotswap models and configs within a single request. Requires admin mode.", action='store_true')
deprecatedgroup = parser.add_argument_group('Deprecated Commands, DO NOT USE!')
deprecatedgroup.add_argument("--hordeconfig", help=argparse.SUPPRESS, nargs='+')