mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-30 12:40:29 +00:00
server: add auto-sleep after N seconds of idle (#18228)
* implement sleeping at queue level * implement server-context suspend * add test * add docs * optimization: add fast path * make sure to free llama_init * nits * fix use-after-free * allow /models to be accessed during sleeping, fix use-after-free * don't allow accessing /models during sleep, it is not thread-safe * fix data race on accessing props and model_meta * small clean up * trailing whitespace * rm outdated comments
This commit is contained in:
parent
52ab19df63
commit
ddcb75dd8a
12 changed files with 355 additions and 122 deletions
|
|
@ -100,6 +100,7 @@ class ServerProcess:
|
|||
server_path: str | None = None
|
||||
mmproj_url: str | None = None
|
||||
media_path: str | None = None
|
||||
sleep_idle_seconds: int | None = None
|
||||
|
||||
# session variables
|
||||
process: subprocess.Popen | None = None
|
||||
|
|
@ -230,6 +231,8 @@ class ServerProcess:
|
|||
server_args.extend(["--mmproj-url", self.mmproj_url])
|
||||
if self.media_path:
|
||||
server_args.extend(["--media-path", self.media_path])
|
||||
if self.sleep_idle_seconds is not None:
|
||||
server_args.extend(["--sleep-idle-seconds", self.sleep_idle_seconds])
|
||||
|
||||
args = [str(arg) for arg in [server_path, *server_args]]
|
||||
print(f"tests: starting server with: {' '.join(args)}")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue