wip prompt

This commit is contained in:
Concedo 2024-08-06 21:54:08 +08:00
parent c23d91987a
commit 853d57c53c

View file

@ -74,6 +74,13 @@ currfinishreason = "null"
using_gui_launcher = False
using_outdated_flags = False
saved_stdout = None
saved_stderr = None
saved_stdout_py = None
saved_stderr_py = None
stdout_nullfile = None
stdout_nullfile_py = None
CLDevices = ["1","2","3","4"]
CUDevices = ["1","2","3","4","All"]
CLDevicesNames = ["","","",""]
@ -224,6 +231,34 @@ def getabspath():
def file_exists(filename):
return os.path.exists(os.path.join(getdirpath(), filename))
def suppress_stdout():
global saved_stdout, saved_stderr, saved_stdout_py, saved_stderr_py, stdout_nullfile, stdout_nullfile_py
if not saved_stdout and not saved_stderr and not saved_stdout_py and not saved_stderr_py and not stdout_nullfile and not stdout_nullfile_py:
sys.stdout.flush()
sys.stderr.flush()
saved_stdout = os.dup(sys.stdout.fileno())
saved_stderr = os.dup(sys.stderr.fileno())
saved_stderr_py = sys.stderr
saved_stdout_py = sys.stdout
stdout_nullfile = os.open(os.devnull, os.O_WRONLY)
stdout_nullfile_py = open(os.devnull, 'w')
os.dup2(stdout_nullfile, sys.stdout.fileno())
os.dup2(stdout_nullfile, sys.stderr.fileno())
sys.stderr = sys.stdout = stdout_nullfile_py
def restore_stdout():
global saved_stdout, saved_stderr, saved_stdout_py, saved_stderr_py, stdout_nullfile, stdout_nullfile_py
if saved_stdout and saved_stderr and saved_stdout_py and saved_stderr_py and stdout_nullfile and stdout_nullfile_py:
sys.stdout = saved_stdout_py
sys.stderr = saved_stderr_py
os.dup2(saved_stdout, sys.stdout.fileno())
os.dup2(saved_stderr, sys.stderr.fileno())
os.close(stdout_nullfile)
stdout_nullfile_py.close()
os.close(saved_stdout)
os.close(saved_stderr)
saved_stdout = saved_stderr = saved_stdout_py = saved_stderr_py = stdout_nullfile = stdout_nullfile_py = None
def get_default_threads():
physical_core_limit = 1
if os.cpu_count()!=None and os.cpu_count()>1:
@ -3724,13 +3759,19 @@ def main(launch_args,start_server=True):
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath
args = launch_args
if (args.model_param or args.model) and args.prompt and not args.benchmark:
suppress_stdout()
print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually
# print("Python version: " + sys.version)
#perform some basic cleanup of old temporary directories
try:
delete_old_pyinstaller()
except Exception as e:
print(f"Error cleaning up orphaned pyinstaller dirs: {e}")
args = launch_args
if args.unpack:
unpack_to_dir(args.unpack)
return
@ -4162,13 +4203,17 @@ def main(launch_args,start_server=True):
timer_thread = threading.Timer(1, onready_subprocess) #1 second delay
timer_thread.start()
if args.model_param and args.benchmark:
if args.model_param and (args.benchmark or args.prompt):
from datetime import datetime, timezone
start_server = False
save_to_file = (args.benchmark!="stdout" and args.benchmark!="")
save_to_file = (args.benchmark and args.benchmark!="stdout" and args.benchmark!="")
benchmaxctx = maxctx
benchlen = 100
benchmodel = sanitize_string(os.path.splitext(os.path.basename(modelname))[0])
benchprompt = ""
if args.prompt:
benchprompt = args.prompt
if args.benchmark:
if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000:
print(f"\nWarning: The benchmark CSV output file you selected exceeds 1MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.")
save_to_file = False
@ -4176,13 +4221,17 @@ def main(launch_args,start_server=True):
print(f"\nRunning benchmark (Save to File: {args.benchmark})...")
else:
print(f"\nRunning benchmark (Not Saved)...")
if benchprompt=="":
benchprompt = "1111111111111111"
for i in range(0,14): #generate massive prompt
benchprompt += benchprompt
genout = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=0.1,top_k=1,rep_pen=1,ban_eos_token=True)
result = genout['text']
result = (result[:5] if len(result)>5 else "")
if args.prompt and not args.benchmark:
restore_stdout()
print(result)
if args.benchmark:
result = (result[:8] if len(result)>8 else "") if not args.prompt else result
t_pp = float(handle.get_last_process_time())*float(benchmaxctx-benchlen)*0.001
t_gen = float(handle.get_last_eval_time())*float(benchlen)*0.001
s_pp = float(benchmaxctx-benchlen)/t_pp
@ -4228,6 +4277,7 @@ def main(launch_args,start_server=True):
asyncio.run(RunServerMultiThreaded(args.host, args.port))
else:
# Flush stdout for previous win32 issue so the client can see output.
if not args.prompt or args.benchmark:
print(f"Server was not started, main function complete. Idling.", flush=True)
def run_in_queue(launch_args, input_queue, output_queue):
@ -4265,8 +4315,6 @@ if __name__ == '__main__':
return f
return range_checker
print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually
# print("Python version: " + sys.version)
parser = argparse.ArgumentParser(description='KoboldCpp Server')
modelgroup = parser.add_mutually_exclusive_group() #we want to be backwards compatible with the unnamed positional args
modelgroup.add_argument("--model", metavar=('[filename]'), help="Model file to load", type=str, default="")
@ -4302,6 +4350,7 @@ if __name__ == '__main__':
advparser.add_argument("--skiplauncher", help="Doesn't display or use the GUI launcher.", action='store_true')
advparser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", metavar=('[shell command]'), type=str, default="",nargs=1)
advparser.add_argument("--benchmark", help="Do not start server, instead run benchmarks. If filename is provided, appends results to provided file.", metavar=('[filename]'), nargs='?', const="stdout", type=str, default=None)
advparser.add_argument("--prompt", metavar=('[prompt]'), help="Passing a prompt string triggers a direct inference, loading the model, outputs the response to stdout and exits. Can be used alone or with benchmark.", type=str, default="")
advparser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", metavar=('limit'), nargs='?', const=1, type=int, default=1)
advparser.add_argument("--remotetunnel", help="Uses Cloudflare to create a remote tunnel, allowing you to access koboldcpp remotely over the internet even behind a firewall.", action='store_true')
advparser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')