feat(router): add --routermodetimeout to make reverse-proxy timeout configurable (#2169)

Closes the hardcoded 600s timeout in the router-mode reverse proxy: long
generations through --routermode would be cut off at the upstream
HTTPConnection timeout regardless of how long the model actually takes,
because http.client.HTTPConnection('localhost', upstream_port, timeout=600)
was wired with a literal 600.

Adds a new --routermodetimeout (default 600) under the admin group, and
threads it through the three HTTPConnection sites in the router handler:
the model-swap reload, the autoswap reload, and the main upstream proxy
forward. Behavior is unchanged at the default; users with long generations
can now pass e.g. --routermodetimeout 3600.

Reported in https://github.com/LostRuins/koboldcpp/issues/2168
This commit is contained in:
Tai An 2026-04-29 05:20:42 -07:00 committed by GitHub
parent 70be589894
commit dfd87c4fb6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

7
koboldcpp.py Executable file → Normal file
View file

@ -4418,7 +4418,7 @@ class KcppProxyHandler(http.server.BaseHTTPRequestHandler):
}
if args.adminpassword:
reqheaders["Authorization"] = f"Bearer {args.adminpassword}"
conn = http.client.HTTPConnection('localhost', upstream_port, timeout=600)
conn = http.client.HTTPConnection('localhost', upstream_port, timeout=args.routermodetimeout)
conn.request("POST", "/api/admin/reload_config", body=reqbody, headers=reqheaders)
resp = conn.getresponse()
time.sleep(3)
@ -4464,7 +4464,7 @@ class KcppProxyHandler(http.server.BaseHTTPRequestHandler):
}
if args.adminpassword:
reqheaders["Authorization"] = f"Bearer {args.adminpassword}"
conn = http.client.HTTPConnection('localhost', upstream_port, timeout=600)
conn = http.client.HTTPConnection('localhost', upstream_port, timeout=args.routermodetimeout)
conn.request("POST", "/api/admin/reload_config", body=reqbody, headers=reqheaders)
resp = conn.getresponse()
time.sleep(3)
@ -4475,7 +4475,7 @@ class KcppProxyHandler(http.server.BaseHTTPRequestHandler):
time.sleep(0.1)
try: # connect upstream
conn = http.client.HTTPConnection('localhost', upstream_port, timeout=600)
conn = http.client.HTTPConnection('localhost', upstream_port, timeout=args.routermodetimeout)
conn.request( self.command, self.path, body=body, headers=headers)
resp = conn.getresponse()
except OSError as e:
@ -11454,6 +11454,7 @@ if __name__ == '__main__':
admingroup.add_argument("--admindir", metavar=('[directory]'), help="Specify a directory to look for .kcpps configs in, which can be used to swap models.", default="")
admingroup.add_argument("--adminunloadtimeout", help="Set an idle timeout in seconds after which KoboldCpp will automatically unload the current model.", type=int, default=0)
admingroup.add_argument("--routermode", help="Router mode uses a reverse proxy router, allowing you to easily hotswap models and configs within a single request. Requires admin mode.", action='store_true')
admingroup.add_argument("--routermodetimeout", metavar=('[seconds]'), help="Timeout in seconds for the router-mode reverse proxy when forwarding requests to the loaded backend (also applies to admin reload calls). Increase this if long generations are being cut off at the proxy. Defaults to 600.", type=int, default=600)
admingroup.add_argument("--autoswapmode", help="Autoswap mode builds on router mode to allow switching of model types within the same config automatically. Requires admin mode and router mode. All models desired must be defined within the same config.", action='store_true')
admingroup.add_argument("--baseconfig", help="Specify a base .kcpps config to apply, if no custom base config is selected during a model swap", default="")