mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
wip prompt
This commit is contained in:
parent
c23d91987a
commit
853d57c53c
1 changed files with 101 additions and 52 deletions
63
koboldcpp.py
63
koboldcpp.py
|
@ -74,6 +74,13 @@ currfinishreason = "null"
|
||||||
using_gui_launcher = False
|
using_gui_launcher = False
|
||||||
using_outdated_flags = False
|
using_outdated_flags = False
|
||||||
|
|
||||||
|
saved_stdout = None
|
||||||
|
saved_stderr = None
|
||||||
|
saved_stdout_py = None
|
||||||
|
saved_stderr_py = None
|
||||||
|
stdout_nullfile = None
|
||||||
|
stdout_nullfile_py = None
|
||||||
|
|
||||||
CLDevices = ["1","2","3","4"]
|
CLDevices = ["1","2","3","4"]
|
||||||
CUDevices = ["1","2","3","4","All"]
|
CUDevices = ["1","2","3","4","All"]
|
||||||
CLDevicesNames = ["","","",""]
|
CLDevicesNames = ["","","",""]
|
||||||
|
@ -224,6 +231,34 @@ def getabspath():
|
||||||
def file_exists(filename):
|
def file_exists(filename):
|
||||||
return os.path.exists(os.path.join(getdirpath(), filename))
|
return os.path.exists(os.path.join(getdirpath(), filename))
|
||||||
|
|
||||||
|
def suppress_stdout():
|
||||||
|
global saved_stdout, saved_stderr, saved_stdout_py, saved_stderr_py, stdout_nullfile, stdout_nullfile_py
|
||||||
|
if not saved_stdout and not saved_stderr and not saved_stdout_py and not saved_stderr_py and not stdout_nullfile and not stdout_nullfile_py:
|
||||||
|
sys.stdout.flush()
|
||||||
|
sys.stderr.flush()
|
||||||
|
saved_stdout = os.dup(sys.stdout.fileno())
|
||||||
|
saved_stderr = os.dup(sys.stderr.fileno())
|
||||||
|
saved_stderr_py = sys.stderr
|
||||||
|
saved_stdout_py = sys.stdout
|
||||||
|
stdout_nullfile = os.open(os.devnull, os.O_WRONLY)
|
||||||
|
stdout_nullfile_py = open(os.devnull, 'w')
|
||||||
|
os.dup2(stdout_nullfile, sys.stdout.fileno())
|
||||||
|
os.dup2(stdout_nullfile, sys.stderr.fileno())
|
||||||
|
sys.stderr = sys.stdout = stdout_nullfile_py
|
||||||
|
|
||||||
|
def restore_stdout():
|
||||||
|
global saved_stdout, saved_stderr, saved_stdout_py, saved_stderr_py, stdout_nullfile, stdout_nullfile_py
|
||||||
|
if saved_stdout and saved_stderr and saved_stdout_py and saved_stderr_py and stdout_nullfile and stdout_nullfile_py:
|
||||||
|
sys.stdout = saved_stdout_py
|
||||||
|
sys.stderr = saved_stderr_py
|
||||||
|
os.dup2(saved_stdout, sys.stdout.fileno())
|
||||||
|
os.dup2(saved_stderr, sys.stderr.fileno())
|
||||||
|
os.close(stdout_nullfile)
|
||||||
|
stdout_nullfile_py.close()
|
||||||
|
os.close(saved_stdout)
|
||||||
|
os.close(saved_stderr)
|
||||||
|
saved_stdout = saved_stderr = saved_stdout_py = saved_stderr_py = stdout_nullfile = stdout_nullfile_py = None
|
||||||
|
|
||||||
def get_default_threads():
|
def get_default_threads():
|
||||||
physical_core_limit = 1
|
physical_core_limit = 1
|
||||||
if os.cpu_count()!=None and os.cpu_count()>1:
|
if os.cpu_count()!=None and os.cpu_count()>1:
|
||||||
|
@ -3724,13 +3759,19 @@ def main(launch_args,start_server=True):
|
||||||
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui
|
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui
|
||||||
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath
|
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath
|
||||||
|
|
||||||
|
args = launch_args
|
||||||
|
if (args.model_param or args.model) and args.prompt and not args.benchmark:
|
||||||
|
suppress_stdout()
|
||||||
|
|
||||||
|
print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually
|
||||||
|
# print("Python version: " + sys.version)
|
||||||
|
|
||||||
#perform some basic cleanup of old temporary directories
|
#perform some basic cleanup of old temporary directories
|
||||||
try:
|
try:
|
||||||
delete_old_pyinstaller()
|
delete_old_pyinstaller()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error cleaning up orphaned pyinstaller dirs: {e}")
|
print(f"Error cleaning up orphaned pyinstaller dirs: {e}")
|
||||||
|
|
||||||
args = launch_args
|
|
||||||
if args.unpack:
|
if args.unpack:
|
||||||
unpack_to_dir(args.unpack)
|
unpack_to_dir(args.unpack)
|
||||||
return
|
return
|
||||||
|
@ -4162,13 +4203,17 @@ def main(launch_args,start_server=True):
|
||||||
timer_thread = threading.Timer(1, onready_subprocess) #1 second delay
|
timer_thread = threading.Timer(1, onready_subprocess) #1 second delay
|
||||||
timer_thread.start()
|
timer_thread.start()
|
||||||
|
|
||||||
if args.model_param and args.benchmark:
|
if args.model_param and (args.benchmark or args.prompt):
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
start_server = False
|
start_server = False
|
||||||
save_to_file = (args.benchmark!="stdout" and args.benchmark!="")
|
save_to_file = (args.benchmark and args.benchmark!="stdout" and args.benchmark!="")
|
||||||
benchmaxctx = maxctx
|
benchmaxctx = maxctx
|
||||||
benchlen = 100
|
benchlen = 100
|
||||||
benchmodel = sanitize_string(os.path.splitext(os.path.basename(modelname))[0])
|
benchmodel = sanitize_string(os.path.splitext(os.path.basename(modelname))[0])
|
||||||
|
benchprompt = ""
|
||||||
|
if args.prompt:
|
||||||
|
benchprompt = args.prompt
|
||||||
|
if args.benchmark:
|
||||||
if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000:
|
if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000:
|
||||||
print(f"\nWarning: The benchmark CSV output file you selected exceeds 1MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.")
|
print(f"\nWarning: The benchmark CSV output file you selected exceeds 1MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.")
|
||||||
save_to_file = False
|
save_to_file = False
|
||||||
|
@ -4176,13 +4221,17 @@ def main(launch_args,start_server=True):
|
||||||
print(f"\nRunning benchmark (Save to File: {args.benchmark})...")
|
print(f"\nRunning benchmark (Save to File: {args.benchmark})...")
|
||||||
else:
|
else:
|
||||||
print(f"\nRunning benchmark (Not Saved)...")
|
print(f"\nRunning benchmark (Not Saved)...")
|
||||||
|
if benchprompt=="":
|
||||||
benchprompt = "1111111111111111"
|
benchprompt = "1111111111111111"
|
||||||
for i in range(0,14): #generate massive prompt
|
for i in range(0,14): #generate massive prompt
|
||||||
benchprompt += benchprompt
|
benchprompt += benchprompt
|
||||||
genout = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=0.1,top_k=1,rep_pen=1,ban_eos_token=True)
|
genout = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=0.1,top_k=1,rep_pen=1,ban_eos_token=True)
|
||||||
result = genout['text']
|
result = genout['text']
|
||||||
result = (result[:5] if len(result)>5 else "")
|
if args.prompt and not args.benchmark:
|
||||||
|
restore_stdout()
|
||||||
|
print(result)
|
||||||
|
if args.benchmark:
|
||||||
|
result = (result[:8] if len(result)>8 else "") if not args.prompt else result
|
||||||
t_pp = float(handle.get_last_process_time())*float(benchmaxctx-benchlen)*0.001
|
t_pp = float(handle.get_last_process_time())*float(benchmaxctx-benchlen)*0.001
|
||||||
t_gen = float(handle.get_last_eval_time())*float(benchlen)*0.001
|
t_gen = float(handle.get_last_eval_time())*float(benchlen)*0.001
|
||||||
s_pp = float(benchmaxctx-benchlen)/t_pp
|
s_pp = float(benchmaxctx-benchlen)/t_pp
|
||||||
|
@ -4228,6 +4277,7 @@ def main(launch_args,start_server=True):
|
||||||
asyncio.run(RunServerMultiThreaded(args.host, args.port))
|
asyncio.run(RunServerMultiThreaded(args.host, args.port))
|
||||||
else:
|
else:
|
||||||
# Flush stdout for previous win32 issue so the client can see output.
|
# Flush stdout for previous win32 issue so the client can see output.
|
||||||
|
if not args.prompt or args.benchmark:
|
||||||
print(f"Server was not started, main function complete. Idling.", flush=True)
|
print(f"Server was not started, main function complete. Idling.", flush=True)
|
||||||
|
|
||||||
def run_in_queue(launch_args, input_queue, output_queue):
|
def run_in_queue(launch_args, input_queue, output_queue):
|
||||||
|
@ -4265,8 +4315,6 @@ if __name__ == '__main__':
|
||||||
return f
|
return f
|
||||||
return range_checker
|
return range_checker
|
||||||
|
|
||||||
print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually
|
|
||||||
# print("Python version: " + sys.version)
|
|
||||||
parser = argparse.ArgumentParser(description='KoboldCpp Server')
|
parser = argparse.ArgumentParser(description='KoboldCpp Server')
|
||||||
modelgroup = parser.add_mutually_exclusive_group() #we want to be backwards compatible with the unnamed positional args
|
modelgroup = parser.add_mutually_exclusive_group() #we want to be backwards compatible with the unnamed positional args
|
||||||
modelgroup.add_argument("--model", metavar=('[filename]'), help="Model file to load", type=str, default="")
|
modelgroup.add_argument("--model", metavar=('[filename]'), help="Model file to load", type=str, default="")
|
||||||
|
@ -4302,6 +4350,7 @@ if __name__ == '__main__':
|
||||||
advparser.add_argument("--skiplauncher", help="Doesn't display or use the GUI launcher.", action='store_true')
|
advparser.add_argument("--skiplauncher", help="Doesn't display or use the GUI launcher.", action='store_true')
|
||||||
advparser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", metavar=('[shell command]'), type=str, default="",nargs=1)
|
advparser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", metavar=('[shell command]'), type=str, default="",nargs=1)
|
||||||
advparser.add_argument("--benchmark", help="Do not start server, instead run benchmarks. If filename is provided, appends results to provided file.", metavar=('[filename]'), nargs='?', const="stdout", type=str, default=None)
|
advparser.add_argument("--benchmark", help="Do not start server, instead run benchmarks. If filename is provided, appends results to provided file.", metavar=('[filename]'), nargs='?', const="stdout", type=str, default=None)
|
||||||
|
advparser.add_argument("--prompt", metavar=('[prompt]'), help="Passing a prompt string triggers a direct inference, loading the model, outputs the response to stdout and exits. Can be used alone or with benchmark.", type=str, default="")
|
||||||
advparser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", metavar=('limit'), nargs='?', const=1, type=int, default=1)
|
advparser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", metavar=('limit'), nargs='?', const=1, type=int, default=1)
|
||||||
advparser.add_argument("--remotetunnel", help="Uses Cloudflare to create a remote tunnel, allowing you to access koboldcpp remotely over the internet even behind a firewall.", action='store_true')
|
advparser.add_argument("--remotetunnel", help="Uses Cloudflare to create a remote tunnel, allowing you to access koboldcpp remotely over the internet even behind a firewall.", action='store_true')
|
||||||
advparser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')
|
advparser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue