Added CLI chat mode

minor cli fixes (+1 squashed commits)

Squashed commits:

[60af39a9] Added CLI chat mode
This commit is contained in:
Concedo 2025-03-26 19:05:38 +08:00
parent 75e7902789
commit b4a8a5a278

View file

@ -3529,6 +3529,7 @@ def show_gui():
usemlock = ctk.IntVar()
debugmode = ctk.IntVar()
keepforeground = ctk.IntVar()
terminalonly = ctk.IntVar()
quietmode = ctk.IntVar(value=0)
nocertifymode = ctk.IntVar(value=0)
@ -4020,7 +4021,8 @@ def show_gui():
"Use MMAP": [usemmap, "Use mmap to load models if enabled, model will not be unloadable"],
"Use mlock": [usemlock, "Enables mlock, preventing the RAM used to load the model from being paged out."],
"Debug Mode": [debugmode, "Enables debug mode, with extra info printed to the terminal."],
"Keep Foreground": [keepforeground, "Bring KoboldCpp to the foreground every time there is a new generation."]
"Keep Foreground": [keepforeground, "Bring KoboldCpp to the foreground every time there is a new generation."],
"CLI Terminal Only": [terminalonly, "Does not launch KoboldCpp HTTP server. Instead, enables KoboldCpp from the command line, accepting interactive console input and displaying responses to the terminal."]
}
for idx, (name, properties) in enumerate(hardware_boxes.items()):
@ -4267,6 +4269,7 @@ def show_gui():
args.nofastforward = fastforward.get()==0
args.remotetunnel = remotetunnel.get()==1
args.foreground = keepforeground.get()==1
args.cli = terminalonly.get()==1
args.quiet = quietmode.get()==1
args.nocertify = nocertifymode.get()==1
args.nomodel = nomodel.get()==1
@ -4425,7 +4428,7 @@ def show_gui():
args.ttsgpu = (ttsgpu_var.get()==1)
args.ttsmaxlen = int(ttsmaxlen_var.get())
args.admin = (admin_var.get()==1)
args.admin = (admin_var.get()==1 and not args.cli)
args.admindir = admin_dir_var.get()
args.adminpassword = admin_password_var.get()
@ -4448,6 +4451,7 @@ def show_gui():
fastforward.set(0 if "nofastforward" in dict and dict["nofastforward"] else 1)
remotetunnel.set(1 if "remotetunnel" in dict and dict["remotetunnel"] else 0)
keepforeground.set(1 if "foreground" in dict and dict["foreground"] else 0)
terminalonly.set(1 if "cli" in dict and dict["cli"] else 0)
quietmode.set(1 if "quiet" in dict and dict["quiet"] else 0)
nocertifymode.set(1 if "nocertify" in dict and dict["nocertify"] else 0)
nomodel.set(1 if "nomodel" in dict and dict["nomodel"] else 0)
@ -5289,9 +5293,13 @@ def main(launch_args, default_args):
print(f"{KcppVersion}") # just print version and exit
return
#prevent disallowed combos
if (args.nomodel or args.benchmark or args.launch or args.admin) and args.cli:
exit_with_error(1, "Error: --cli cannot be combined with --launch, --nomodel, --admin or --benchmark")
args = convert_outdated_args(args)
temp_hide_print = (args.model_param and args.prompt and not args.benchmark and not (args.debugmode >= 1))
temp_hide_print = (args.model_param and (args.prompt and not args.cli) and not args.benchmark and not (args.debugmode >= 1))
if not temp_hide_print:
print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}")
@ -5368,7 +5376,7 @@ def main(launch_args, default_args):
print("\nWARNING: Admin was set without selecting an admin directory. Admin cannot be used.\n")
if not args.admin: #run in single process mode
if args.remotetunnel and not args.prompt and not args.benchmark:
if args.remotetunnel and not args.prompt and not args.benchmark and not args.cli:
setuptunnel(global_memory, True if args.sdmodel else False)
kcpp_main_process(args,global_memory,using_gui_launcher)
if global_memory["input_to_exit"]:
@ -5379,7 +5387,7 @@ def main(launch_args, default_args):
with multiprocessing.Manager() as mp_manager:
global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False})
if args.remotetunnel and not args.prompt and not args.benchmark:
if args.remotetunnel and not args.prompt and not args.benchmark and not args.cli:
setuptunnel(global_memory, True if args.sdmodel else False)
# invoke the main koboldcpp process
@ -5459,10 +5467,10 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
using_gui_launcher = gui_launcher
start_time = time.time()
if args.model_param and args.prompt and not args.benchmark and not (args.debugmode >= 1):
if args.model_param and (args.prompt and not args.cli) and not args.benchmark and not (args.debugmode >= 1):
suppress_stdout()
if args.model_param and (args.benchmark or args.prompt):
if args.model_param and (args.benchmark or args.prompt or args.cli):
start_server = False
#try to read story if provided
@ -5985,6 +5993,8 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
endpoint_url = f"{httpsaffix}://localhost:{args.port}"
else:
endpoint_url = f"{httpsaffix}://{args.host}:{args.port}"
if start_server:
if not args.remotetunnel:
print(f"Starting Kobold API on port {args.port} at {endpoint_url}/api/")
print(f"Starting OpenAI Compatible API on port {args.port} at {endpoint_url}/v1/")
@ -6024,6 +6034,29 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
timer_thread.start()
if not start_server:
if args.cli:
print("\n===\nNow running KoboldCpp in Interactive Terminal Chat mode.\nType /quit or /exit to end session.\n")
lastturns = []
if args.prompt and args.prompt!="":
lastturns.append({"role":"system","content":args.prompt})
print(f"System Prompt:\n{args.prompt}\n")
while True:
lastuserinput = input("> ")
if lastuserinput=="/quit" or lastuserinput=="/exit":
break
if not lastuserinput:
continue
lastturns.append({"role":"user","content":lastuserinput})
payload = {"messages":lastturns,"rep_pen":1.07,"temperature":0.8}
payload = transform_genparams(payload, 4) #to chat completions
suppress_stdout()
genout = generate(genparams=payload)
restore_stdout()
result = genout["text"]
if result:
lastturns.append({"role":"assistant","content":result})
print(result.strip() + "\n", flush=True)
else:
save_to_file = (args.benchmark and args.benchmark!="stdout" and args.benchmark!="")
benchmaxctx = maxctx
benchlen = args.promptlimit
@ -6111,7 +6144,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
asyncio.run(RunServerMultiThreaded(args.host, args.port, KcppServerRequestHandler))
else:
# Flush stdout for previous win32 issue so the client can see output.
if not args.prompt or args.benchmark:
if not args.prompt or args.benchmark or args.cli:
print("Server was not started, main function complete. Idling.", flush=True)
if __name__ == '__main__':
@ -6169,6 +6202,7 @@ if __name__ == '__main__':
advparser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", metavar=('[shell command]'), type=str, default="",nargs=1)
advparser.add_argument("--benchmark", help="Do not start server, instead run benchmarks. If filename is provided, appends results to provided file.", metavar=('[filename]'), nargs='?', const="stdout", type=str, default=None)
advparser.add_argument("--prompt", metavar=('[prompt]'), help="Passing a prompt string triggers a direct inference, loading the model, outputs the response to stdout and exits. Can be used alone or with benchmark.", type=str, default="")
advparser.add_argument("--cli", help="Does not launch KoboldCpp HTTP server. Instead, enables KoboldCpp from the command line, accepting interactive console input and displaying responses to the terminal.", action='store_true')
advparser.add_argument("--promptlimit", help="Sets the maximum number of generated tokens, usable only with --prompt or --benchmark",metavar=('[token limit]'), type=int, default=100)
advparser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", metavar=('limit'), nargs='?', const=1, type=int, default=1)
advparser.add_argument("--multiplayer", help="Hosts a shared multiplayer session that others can join.", action='store_true')