mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
Added CLI chat mode
minor cli fixes (+1 squashed commits) Squashed commits: [60af39a9] Added CLI chat mode
This commit is contained in:
parent
75e7902789
commit
b4a8a5a278
1 changed files with 143 additions and 109 deletions
50
koboldcpp.py
50
koboldcpp.py
|
@ -3529,6 +3529,7 @@ def show_gui():
|
|||
usemlock = ctk.IntVar()
|
||||
debugmode = ctk.IntVar()
|
||||
keepforeground = ctk.IntVar()
|
||||
terminalonly = ctk.IntVar()
|
||||
quietmode = ctk.IntVar(value=0)
|
||||
nocertifymode = ctk.IntVar(value=0)
|
||||
|
||||
|
@ -4020,7 +4021,8 @@ def show_gui():
|
|||
"Use MMAP": [usemmap, "Use mmap to load models if enabled, model will not be unloadable"],
|
||||
"Use mlock": [usemlock, "Enables mlock, preventing the RAM used to load the model from being paged out."],
|
||||
"Debug Mode": [debugmode, "Enables debug mode, with extra info printed to the terminal."],
|
||||
"Keep Foreground": [keepforeground, "Bring KoboldCpp to the foreground every time there is a new generation."]
|
||||
"Keep Foreground": [keepforeground, "Bring KoboldCpp to the foreground every time there is a new generation."],
|
||||
"CLI Terminal Only": [terminalonly, "Does not launch KoboldCpp HTTP server. Instead, enables KoboldCpp from the command line, accepting interactive console input and displaying responses to the terminal."]
|
||||
}
|
||||
|
||||
for idx, (name, properties) in enumerate(hardware_boxes.items()):
|
||||
|
@ -4267,6 +4269,7 @@ def show_gui():
|
|||
args.nofastforward = fastforward.get()==0
|
||||
args.remotetunnel = remotetunnel.get()==1
|
||||
args.foreground = keepforeground.get()==1
|
||||
args.cli = terminalonly.get()==1
|
||||
args.quiet = quietmode.get()==1
|
||||
args.nocertify = nocertifymode.get()==1
|
||||
args.nomodel = nomodel.get()==1
|
||||
|
@ -4425,7 +4428,7 @@ def show_gui():
|
|||
args.ttsgpu = (ttsgpu_var.get()==1)
|
||||
args.ttsmaxlen = int(ttsmaxlen_var.get())
|
||||
|
||||
args.admin = (admin_var.get()==1)
|
||||
args.admin = (admin_var.get()==1 and not args.cli)
|
||||
args.admindir = admin_dir_var.get()
|
||||
args.adminpassword = admin_password_var.get()
|
||||
|
||||
|
@ -4448,6 +4451,7 @@ def show_gui():
|
|||
fastforward.set(0 if "nofastforward" in dict and dict["nofastforward"] else 1)
|
||||
remotetunnel.set(1 if "remotetunnel" in dict and dict["remotetunnel"] else 0)
|
||||
keepforeground.set(1 if "foreground" in dict and dict["foreground"] else 0)
|
||||
terminalonly.set(1 if "cli" in dict and dict["cli"] else 0)
|
||||
quietmode.set(1 if "quiet" in dict and dict["quiet"] else 0)
|
||||
nocertifymode.set(1 if "nocertify" in dict and dict["nocertify"] else 0)
|
||||
nomodel.set(1 if "nomodel" in dict and dict["nomodel"] else 0)
|
||||
|
@ -5289,9 +5293,13 @@ def main(launch_args, default_args):
|
|||
print(f"{KcppVersion}") # just print version and exit
|
||||
return
|
||||
|
||||
#prevent disallowed combos
|
||||
if (args.nomodel or args.benchmark or args.launch or args.admin) and args.cli:
|
||||
exit_with_error(1, "Error: --cli cannot be combined with --launch, --nomodel, --admin or --benchmark")
|
||||
|
||||
args = convert_outdated_args(args)
|
||||
|
||||
temp_hide_print = (args.model_param and args.prompt and not args.benchmark and not (args.debugmode >= 1))
|
||||
temp_hide_print = (args.model_param and (args.prompt and not args.cli) and not args.benchmark and not (args.debugmode >= 1))
|
||||
|
||||
if not temp_hide_print:
|
||||
print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}")
|
||||
|
@ -5368,7 +5376,7 @@ def main(launch_args, default_args):
|
|||
print("\nWARNING: Admin was set without selecting an admin directory. Admin cannot be used.\n")
|
||||
|
||||
if not args.admin: #run in single process mode
|
||||
if args.remotetunnel and not args.prompt and not args.benchmark:
|
||||
if args.remotetunnel and not args.prompt and not args.benchmark and not args.cli:
|
||||
setuptunnel(global_memory, True if args.sdmodel else False)
|
||||
kcpp_main_process(args,global_memory,using_gui_launcher)
|
||||
if global_memory["input_to_exit"]:
|
||||
|
@ -5379,7 +5387,7 @@ def main(launch_args, default_args):
|
|||
with multiprocessing.Manager() as mp_manager:
|
||||
global_memory = mp_manager.dict({"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False})
|
||||
|
||||
if args.remotetunnel and not args.prompt and not args.benchmark:
|
||||
if args.remotetunnel and not args.prompt and not args.benchmark and not args.cli:
|
||||
setuptunnel(global_memory, True if args.sdmodel else False)
|
||||
|
||||
# invoke the main koboldcpp process
|
||||
|
@ -5459,10 +5467,10 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
|||
using_gui_launcher = gui_launcher
|
||||
start_time = time.time()
|
||||
|
||||
if args.model_param and args.prompt and not args.benchmark and not (args.debugmode >= 1):
|
||||
if args.model_param and (args.prompt and not args.cli) and not args.benchmark and not (args.debugmode >= 1):
|
||||
suppress_stdout()
|
||||
|
||||
if args.model_param and (args.benchmark or args.prompt):
|
||||
if args.model_param and (args.benchmark or args.prompt or args.cli):
|
||||
start_server = False
|
||||
|
||||
#try to read story if provided
|
||||
|
@ -5985,6 +5993,8 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
|||
endpoint_url = f"{httpsaffix}://localhost:{args.port}"
|
||||
else:
|
||||
endpoint_url = f"{httpsaffix}://{args.host}:{args.port}"
|
||||
|
||||
if start_server:
|
||||
if not args.remotetunnel:
|
||||
print(f"Starting Kobold API on port {args.port} at {endpoint_url}/api/")
|
||||
print(f"Starting OpenAI Compatible API on port {args.port} at {endpoint_url}/v1/")
|
||||
|
@ -6024,6 +6034,29 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
|||
timer_thread.start()
|
||||
|
||||
if not start_server:
|
||||
if args.cli:
|
||||
print("\n===\nNow running KoboldCpp in Interactive Terminal Chat mode.\nType /quit or /exit to end session.\n")
|
||||
lastturns = []
|
||||
if args.prompt and args.prompt!="":
|
||||
lastturns.append({"role":"system","content":args.prompt})
|
||||
print(f"System Prompt:\n{args.prompt}\n")
|
||||
while True:
|
||||
lastuserinput = input("> ")
|
||||
if lastuserinput=="/quit" or lastuserinput=="/exit":
|
||||
break
|
||||
if not lastuserinput:
|
||||
continue
|
||||
lastturns.append({"role":"user","content":lastuserinput})
|
||||
payload = {"messages":lastturns,"rep_pen":1.07,"temperature":0.8}
|
||||
payload = transform_genparams(payload, 4) #to chat completions
|
||||
suppress_stdout()
|
||||
genout = generate(genparams=payload)
|
||||
restore_stdout()
|
||||
result = genout["text"]
|
||||
if result:
|
||||
lastturns.append({"role":"assistant","content":result})
|
||||
print(result.strip() + "\n", flush=True)
|
||||
else:
|
||||
save_to_file = (args.benchmark and args.benchmark!="stdout" and args.benchmark!="")
|
||||
benchmaxctx = maxctx
|
||||
benchlen = args.promptlimit
|
||||
|
@ -6111,7 +6144,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
|||
asyncio.run(RunServerMultiThreaded(args.host, args.port, KcppServerRequestHandler))
|
||||
else:
|
||||
# Flush stdout for previous win32 issue so the client can see output.
|
||||
if not args.prompt or args.benchmark:
|
||||
if not args.prompt or args.benchmark or args.cli:
|
||||
print("Server was not started, main function complete. Idling.", flush=True)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -6169,6 +6202,7 @@ if __name__ == '__main__':
|
|||
advparser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", metavar=('[shell command]'), type=str, default="",nargs=1)
|
||||
advparser.add_argument("--benchmark", help="Do not start server, instead run benchmarks. If filename is provided, appends results to provided file.", metavar=('[filename]'), nargs='?', const="stdout", type=str, default=None)
|
||||
advparser.add_argument("--prompt", metavar=('[prompt]'), help="Passing a prompt string triggers a direct inference, loading the model, outputs the response to stdout and exits. Can be used alone or with benchmark.", type=str, default="")
|
||||
advparser.add_argument("--cli", help="Does not launch KoboldCpp HTTP server. Instead, enables KoboldCpp from the command line, accepting interactive console input and displaying responses to the terminal.", action='store_true')
|
||||
advparser.add_argument("--promptlimit", help="Sets the maximum number of generated tokens, usable only with --prompt or --benchmark",metavar=('[token limit]'), type=int, default=100)
|
||||
advparser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", metavar=('limit'), nargs='?', const=1, type=int, default=1)
|
||||
advparser.add_argument("--multiplayer", help="Hosts a shared multiplayer session that others can join.", action='store_true')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue