diff --git a/aria2c-win.exe b/aria2c-win.exe new file mode 100644 index 000000000..5004e1034 Binary files /dev/null and b/aria2c-win.exe differ diff --git a/expose.h b/expose.h index 5f48fe6cf..daea7472a 100644 --- a/expose.h +++ b/expose.h @@ -69,6 +69,7 @@ struct load_model_inputs const float tensor_split[tensor_split_max] = {}; const int quant_k = 0; const int quant_v = 0; + const bool check_slowness = false; const bool quiet = false; const int debugmode = 0; }; diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 35b774b57..bcb113968 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -136,6 +136,7 @@ static std::string concat_output_reader_copy_res = ""; //for gen response static std::vector logit_biases; static bool add_bos_token = true; // if set to false, mmproj handling breaks. dont disable unless you know what you're doing static bool load_guidance = false; //whether to enable cfg for negative prompts +static bool check_slowness = false; //will display a suggestion to use highpriority if slow static int delayed_generated_tokens_limit = 0; std::deque delayed_generated_tokens; //for use with antislop sampling @@ -1927,6 +1928,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in max_context_limit_at_load = clamped_max_context_length; add_bos_token = !inputs.no_bos_token; load_guidance = inputs.load_guidance; + check_slowness = inputs.check_slowness; if(!add_bos_token) { @@ -4164,6 +4166,14 @@ generation_outputs gpttype_generate(const generation_inputs inputs) { printf("\n(Draft Results - Success:%d, Failure:%d)",draft_successes,draft_failures); } + if(check_slowness && ts2<2.0f) + { + check_slowness = false; + if(!is_quiet) + { + printf("\n======\nNote: Your generation speed appears rather slow. You can try relaunching KoboldCpp with the high priority toggle (or --highpriority) to see if it helps.\n======\n"); + } + } fflush(stdout); output.status = 1; int finaltokcount = (int)current_context_tokens.size()-realnpredict; diff --git a/koboldcpp.py b/koboldcpp.py index f36fb22f0..f771f4592 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -190,6 +190,7 @@ class load_model_inputs(ctypes.Structure): ("tensor_split", ctypes.c_float * tensor_split_max), ("quant_k", ctypes.c_int), ("quant_v", ctypes.c_int), + ("check_slowness", ctypes.c_bool), ("quiet", ctypes.c_bool), ("debugmode", ctypes.c_int)] @@ -1234,6 +1235,7 @@ def load_model(model_filename): inputs.load_guidance = args.enableguidance inputs.override_kv = args.overridekv.encode("UTF-8") if args.overridekv else "".encode("UTF-8") inputs.override_tensors = args.overridetensors.encode("UTF-8") if args.overridetensors else "".encode("UTF-8") + inputs.check_slowness = (not args.highpriority and os.name == 'nt' and 'Intel' in platform.processor()) inputs = set_backend_props(inputs) ret = handle.load_model(inputs) return ret @@ -5577,60 +5579,6 @@ def setuptunnel(global_memory, has_sd): print(str(ex)) return None -def unload_libs(): - global handle - OS = platform.system() - dll_close = None - if OS == "Windows": # pragma: Windows - from ctypes import wintypes - dll_close = ctypes.windll.kernel32.FreeLibrary - dll_close.argtypes = [wintypes.HMODULE] - dll_close.restype = ctypes.c_int - elif OS == "Darwin": - try: - try: # macOS 11 (Big Sur). Possibly also later macOS 10s. - stdlib = ctypes.CDLL("libc.dylib") - except OSError: - stdlib = ctypes.CDLL("libSystem") - except OSError: - # Older macOSs. Not only is the name inconsistent but it's - # not even in PATH. - stdlib = ctypes.CDLL("/usr/lib/system/libsystem_c.dylib") - dll_close = stdlib.dlclose - dll_close.argtypes = [ctypes.c_void_p] - dll_close.restype = ctypes.c_int - elif OS == "Linux": - try: - stdlib = ctypes.CDLL("") - except OSError: - stdlib = ctypes.CDLL("libc.so") # Alpine Linux. - dll_close = stdlib.dlclose - dll_close.argtypes = [ctypes.c_void_p] - dll_close.restype = ctypes.c_int - elif sys.platform == "msys": - # msys can also use `ctypes.CDLL("kernel32.dll").FreeLibrary()`. - stdlib = ctypes.CDLL("msys-2.0.dll") - dll_close = stdlib.dlclose - dll_close.argtypes = [ctypes.c_void_p] - dll_close.restype = ctypes.c_int - elif sys.platform == "cygwin": - stdlib = ctypes.CDLL("cygwin1.dll") - dll_close = stdlib.dlclose - dll_close.argtypes = [ctypes.c_void_p] - dll_close.restype = ctypes.c_int - elif OS == "FreeBSD": - # FreeBSD uses `/usr/lib/libc.so.7` where `7` is another version number. - # It is not in PATH but using its name instead of its path is somehow the - # only way to open it. The name must include the .so.7 suffix. - stdlib = ctypes.CDLL("libc.so.7") - dll_close = stdlib.close - - if handle and dll_close: - print("Unloading Libraries...") - dll_close(handle._handle) - del handle - handle = None - def reload_from_new_args(newargs): try: args.istemplate = False @@ -5766,10 +5714,23 @@ def downloader_internal(input_url, output_filename, capture_output, min_file_siz dl_success = False try: - if shutil.which("aria2c") is not None: + if os.name == 'nt': + basepath = os.path.abspath(os.path.dirname(__file__)) + a2cexe = (os.path.join(basepath, "aria2c-win.exe")) + if os.path.exists(a2cexe): #on windows try using embedded a2cexe + rc = subprocess.run([ + a2cexe, "-x", "16", "-s", "16", "--summary-interval=30", "--console-log-level=error", "--log-level=error", + "--download-result=default", "--allow-overwrite=true", "--file-allocation=none", "--max-tries=3", "-o", output_filename, input_url + ], capture_output=capture_output, text=True, check=True, encoding='utf-8') + dl_success = (rc.returncode == 0 and os.path.exists(output_filename) and os.path.getsize(output_filename) > min_file_size) + except subprocess.CalledProcessError as e: + print(f"aria2c-win failed: {e}") + + try: + if not dl_success and shutil.which("aria2c") is not None: rc = subprocess.run([ "aria2c", "-x", "16", "-s", "16", "--summary-interval=30", "--console-log-level=error", "--log-level=error", - "--download-result=default", "--allow-overwrite=true", "--file-allocation=none", "-o", output_filename, input_url + "--download-result=default", "--allow-overwrite=true", "--file-allocation=none", "--max-tries=3", "-o", output_filename, input_url ], capture_output=capture_output, text=True, check=True, encoding='utf-8') dl_success = (rc.returncode == 0 and os.path.exists(output_filename) and os.path.getsize(output_filename) > min_file_size) except subprocess.CalledProcessError as e: @@ -6226,7 +6187,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False): os_used = sys.platform process = psutil.Process(os.getpid()) # Set high priority for the python script for the CPU oldprio = process.nice() - if os_used == "win32": # Windows (either 32-bit or 64-bit) + if os.name == 'nt': # Windows (either 32-bit or 64-bit) process.nice(psutil.REALTIME_PRIORITY_CLASS) print("High Priority for Windows Set: " + str(oldprio) + " to " + str(process.nice())) elif os_used == "linux": # linux