wip prompt

2025-09-11 01:24:36 +00:00 · 2024-08-06 21:54:08 +08:00 · 2024-08-06 21:54:08 +08:00 · 853d57c53c
commit 853d57c53c
parent c23d91987a
1 changed files with 101 additions and 52 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -74,6 +74,13 @@ currfinishreason = "null"
 using_gui_launcher = False
 using_outdated_flags = False

+saved_stdout = None
+saved_stderr = None
+saved_stdout_py = None
+saved_stderr_py = None
+stdout_nullfile = None
+stdout_nullfile_py = None
+
 CLDevices = ["1","2","3","4"]
 CUDevices = ["1","2","3","4","All"]
 CLDevicesNames = ["","","",""]
@ -224,6 +231,34 @@ def getabspath():
 def file_exists(filename):
    return os.path.exists(os.path.join(getdirpath(), filename))

+def suppress_stdout():
+    global saved_stdout, saved_stderr, saved_stdout_py, saved_stderr_py, stdout_nullfile, stdout_nullfile_py
+    if not saved_stdout and not saved_stderr and not saved_stdout_py and not saved_stderr_py and not stdout_nullfile and not stdout_nullfile_py:
+        sys.stdout.flush()
+        sys.stderr.flush()
+        saved_stdout = os.dup(sys.stdout.fileno())
+        saved_stderr = os.dup(sys.stderr.fileno())
+        saved_stderr_py = sys.stderr
+        saved_stdout_py = sys.stdout
+        stdout_nullfile = os.open(os.devnull, os.O_WRONLY)
+        stdout_nullfile_py = open(os.devnull, 'w')
+        os.dup2(stdout_nullfile, sys.stdout.fileno())
+        os.dup2(stdout_nullfile, sys.stderr.fileno())
+        sys.stderr = sys.stdout = stdout_nullfile_py
+
+def restore_stdout():
+    global saved_stdout, saved_stderr, saved_stdout_py, saved_stderr_py, stdout_nullfile, stdout_nullfile_py
+    if saved_stdout and saved_stderr and saved_stdout_py and saved_stderr_py and stdout_nullfile and stdout_nullfile_py:
+        sys.stdout = saved_stdout_py
+        sys.stderr = saved_stderr_py
+        os.dup2(saved_stdout, sys.stdout.fileno())
+        os.dup2(saved_stderr, sys.stderr.fileno())
+        os.close(stdout_nullfile)
+        stdout_nullfile_py.close()
+        os.close(saved_stdout)
+        os.close(saved_stderr)
+        saved_stdout = saved_stderr = saved_stdout_py = saved_stderr_py = stdout_nullfile = stdout_nullfile_py = None
+
 def get_default_threads():
    physical_core_limit = 1
    if os.cpu_count()!=None and os.cpu_count()>1:
@ -3724,13 +3759,19 @@ def main(launch_args,start_server=True):
    global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui
    global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath

+    args = launch_args
+    if (args.model_param or args.model) and args.prompt and not args.benchmark:
+        suppress_stdout()
+
+    print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually
+    # print("Python version: " + sys.version)
+
    #perform some basic cleanup of old temporary directories
    try:
        delete_old_pyinstaller()
    except Exception as e:
        print(f"Error cleaning up orphaned pyinstaller dirs: {e}")

-    args = launch_args
    if args.unpack:
        unpack_to_dir(args.unpack)
        return
@ -4162,13 +4203,17 @@ def main(launch_args,start_server=True):
        timer_thread = threading.Timer(1, onready_subprocess) #1 second delay
        timer_thread.start()

-    if args.model_param and args.benchmark:
+    if args.model_param and (args.benchmark or args.prompt):
        from datetime import datetime, timezone
        start_server = False
-        save_to_file = (args.benchmark!="stdout" and args.benchmark!="")
+        save_to_file = (args.benchmark and args.benchmark!="stdout" and args.benchmark!="")
        benchmaxctx = maxctx
        benchlen = 100
        benchmodel = sanitize_string(os.path.splitext(os.path.basename(modelname))[0])
+        benchprompt = ""
+        if args.prompt:
+            benchprompt = args.prompt
+        if args.benchmark:
            if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000:
                print(f"\nWarning: The benchmark CSV output file you selected exceeds 1MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.")
                save_to_file = False
@ -4176,13 +4221,17 @@ def main(launch_args,start_server=True):
                print(f"\nRunning benchmark (Save to File: {args.benchmark})...")
            else:
                print(f"\nRunning benchmark (Not Saved)...")
-
+            if benchprompt=="":
                benchprompt = "1111111111111111"
                for i in range(0,14): #generate massive prompt
                    benchprompt += benchprompt
        genout = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=0.1,top_k=1,rep_pen=1,ban_eos_token=True)
        result = genout['text']
-        result = (result[:5] if len(result)>5 else "")
+        if args.prompt and not args.benchmark:
+            restore_stdout()
+            print(result)
+        if args.benchmark:
+            result = (result[:8] if len(result)>8 else "") if not args.prompt else result
            t_pp = float(handle.get_last_process_time())*float(benchmaxctx-benchlen)*0.001
            t_gen = float(handle.get_last_eval_time())*float(benchlen)*0.001
            s_pp = float(benchmaxctx-benchlen)/t_pp
@ -4228,6 +4277,7 @@ def main(launch_args,start_server=True):
        asyncio.run(RunServerMultiThreaded(args.host, args.port))
    else:
        # Flush stdout for previous win32 issue so the client can see output.
+        if not args.prompt or args.benchmark:
            print(f"Server was not started, main function complete. Idling.", flush=True)

 def run_in_queue(launch_args, input_queue, output_queue):
@ -4265,8 +4315,6 @@ if __name__ == '__main__':
            return f
        return range_checker

-    print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually
-    # print("Python version: " + sys.version)
    parser = argparse.ArgumentParser(description='KoboldCpp Server')
    modelgroup = parser.add_mutually_exclusive_group() #we want to be backwards compatible with the unnamed positional args
    modelgroup.add_argument("--model", metavar=('[filename]'), help="Model file to load", type=str, default="")
@ -4302,6 +4350,7 @@ if __name__ == '__main__':
    advparser.add_argument("--skiplauncher", help="Doesn't display or use the GUI launcher.", action='store_true')
    advparser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", metavar=('[shell command]'), type=str, default="",nargs=1)
    advparser.add_argument("--benchmark", help="Do not start server, instead run benchmarks. If filename is provided, appends results to provided file.", metavar=('[filename]'), nargs='?', const="stdout", type=str, default=None)
+    advparser.add_argument("--prompt", metavar=('[prompt]'), help="Passing a prompt string triggers a direct inference, loading the model, outputs the response to stdout and exits. Can be used alone or with benchmark.", type=str, default="")
    advparser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", metavar=('limit'), nargs='?', const=1, type=int, default=1)
    advparser.add_argument("--remotetunnel", help="Uses Cloudflare to create a remote tunnel, allowing you to access koboldcpp remotely over the internet even behind a firewall.", action='store_true')
    advparser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')