From dfd87c4fb685a94497876aec69dde1d3be1628d7 Mon Sep 17 00:00:00 2001 From: Tai An Date: Wed, 29 Apr 2026 05:20:42 -0700 Subject: [PATCH] feat(router): add --routermodetimeout to make reverse-proxy timeout configurable (#2169) Closes the hardcoded 600s timeout in the router-mode reverse proxy: long generations through --routermode would be cut off at the upstream HTTPConnection timeout regardless of how long the model actually takes, because http.client.HTTPConnection('localhost', upstream_port, timeout=600) was wired with a literal 600. Adds a new --routermodetimeout (default 600) under the admin group, and threads it through the three HTTPConnection sites in the router handler: the model-swap reload, the autoswap reload, and the main upstream proxy forward. Behavior is unchanged at the default; users with long generations can now pass e.g. --routermodetimeout 3600. Reported in https://github.com/LostRuins/koboldcpp/issues/2168 --- koboldcpp.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) mode change 100755 => 100644 koboldcpp.py diff --git a/koboldcpp.py b/koboldcpp.py old mode 100755 new mode 100644 index dffd50c5f..53e31acce --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4418,7 +4418,7 @@ class KcppProxyHandler(http.server.BaseHTTPRequestHandler): } if args.adminpassword: reqheaders["Authorization"] = f"Bearer {args.adminpassword}" - conn = http.client.HTTPConnection('localhost', upstream_port, timeout=600) + conn = http.client.HTTPConnection('localhost', upstream_port, timeout=args.routermodetimeout) conn.request("POST", "/api/admin/reload_config", body=reqbody, headers=reqheaders) resp = conn.getresponse() time.sleep(3) @@ -4464,7 +4464,7 @@ class KcppProxyHandler(http.server.BaseHTTPRequestHandler): } if args.adminpassword: reqheaders["Authorization"] = f"Bearer {args.adminpassword}" - conn = http.client.HTTPConnection('localhost', upstream_port, timeout=600) + conn = http.client.HTTPConnection('localhost', upstream_port, timeout=args.routermodetimeout) conn.request("POST", "/api/admin/reload_config", body=reqbody, headers=reqheaders) resp = conn.getresponse() time.sleep(3) @@ -4475,7 +4475,7 @@ class KcppProxyHandler(http.server.BaseHTTPRequestHandler): time.sleep(0.1) try: # connect upstream - conn = http.client.HTTPConnection('localhost', upstream_port, timeout=600) + conn = http.client.HTTPConnection('localhost', upstream_port, timeout=args.routermodetimeout) conn.request( self.command, self.path, body=body, headers=headers) resp = conn.getresponse() except OSError as e: @@ -11454,6 +11454,7 @@ if __name__ == '__main__': admingroup.add_argument("--admindir", metavar=('[directory]'), help="Specify a directory to look for .kcpps configs in, which can be used to swap models.", default="") admingroup.add_argument("--adminunloadtimeout", help="Set an idle timeout in seconds after which KoboldCpp will automatically unload the current model.", type=int, default=0) admingroup.add_argument("--routermode", help="Router mode uses a reverse proxy router, allowing you to easily hotswap models and configs within a single request. Requires admin mode.", action='store_true') + admingroup.add_argument("--routermodetimeout", metavar=('[seconds]'), help="Timeout in seconds for the router-mode reverse proxy when forwarding requests to the loaded backend (also applies to admin reload calls). Increase this if long generations are being cut off at the proxy. Defaults to 600.", type=int, default=600) admingroup.add_argument("--autoswapmode", help="Autoswap mode builds on router mode to allow switching of model types within the same config automatically. Requires admin mode and router mode. All models desired must be defined within the same config.", action='store_true') admingroup.add_argument("--baseconfig", help="Specify a base .kcpps config to apply, if no custom base config is selected during a model swap", default="")