mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
embed aria2c for windows, add slowness check with highpriority recommendation (+1 squashed commits)
Squashed commits: [b9b695217] embed aria2c for windows, add slowness check with highpriority recommendation (+1 squashed commits) Squashed commits: [90b5d389d] embed aria2c for windows, add slowness check with highpriority recommendation (+1 squashed commits) Squashed commits: [fbbaa989f] embed aria2c for windows
This commit is contained in:
parent
9981ba8427
commit
13cee48740
4 changed files with 29 additions and 57 deletions
BIN
aria2c-win.exe
Normal file
BIN
aria2c-win.exe
Normal file
Binary file not shown.
1
expose.h
1
expose.h
|
@ -69,6 +69,7 @@ struct load_model_inputs
|
||||||
const float tensor_split[tensor_split_max] = {};
|
const float tensor_split[tensor_split_max] = {};
|
||||||
const int quant_k = 0;
|
const int quant_k = 0;
|
||||||
const int quant_v = 0;
|
const int quant_v = 0;
|
||||||
|
const bool check_slowness = false;
|
||||||
const bool quiet = false;
|
const bool quiet = false;
|
||||||
const int debugmode = 0;
|
const int debugmode = 0;
|
||||||
};
|
};
|
||||||
|
|
|
@ -136,6 +136,7 @@ static std::string concat_output_reader_copy_res = ""; //for gen response
|
||||||
static std::vector<logit_bias> logit_biases;
|
static std::vector<logit_bias> logit_biases;
|
||||||
static bool add_bos_token = true; // if set to false, mmproj handling breaks. dont disable unless you know what you're doing
|
static bool add_bos_token = true; // if set to false, mmproj handling breaks. dont disable unless you know what you're doing
|
||||||
static bool load_guidance = false; //whether to enable cfg for negative prompts
|
static bool load_guidance = false; //whether to enable cfg for negative prompts
|
||||||
|
static bool check_slowness = false; //will display a suggestion to use highpriority if slow
|
||||||
|
|
||||||
static int delayed_generated_tokens_limit = 0;
|
static int delayed_generated_tokens_limit = 0;
|
||||||
std::deque<std::string> delayed_generated_tokens; //for use with antislop sampling
|
std::deque<std::string> delayed_generated_tokens; //for use with antislop sampling
|
||||||
|
@ -1927,6 +1928,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
max_context_limit_at_load = clamped_max_context_length;
|
max_context_limit_at_load = clamped_max_context_length;
|
||||||
add_bos_token = !inputs.no_bos_token;
|
add_bos_token = !inputs.no_bos_token;
|
||||||
load_guidance = inputs.load_guidance;
|
load_guidance = inputs.load_guidance;
|
||||||
|
check_slowness = inputs.check_slowness;
|
||||||
|
|
||||||
if(!add_bos_token)
|
if(!add_bos_token)
|
||||||
{
|
{
|
||||||
|
@ -4164,6 +4166,14 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
{
|
{
|
||||||
printf("\n(Draft Results - Success:%d, Failure:%d)",draft_successes,draft_failures);
|
printf("\n(Draft Results - Success:%d, Failure:%d)",draft_successes,draft_failures);
|
||||||
}
|
}
|
||||||
|
if(check_slowness && ts2<2.0f)
|
||||||
|
{
|
||||||
|
check_slowness = false;
|
||||||
|
if(!is_quiet)
|
||||||
|
{
|
||||||
|
printf("\n======\nNote: Your generation speed appears rather slow. You can try relaunching KoboldCpp with the high priority toggle (or --highpriority) to see if it helps.\n======\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
output.status = 1;
|
output.status = 1;
|
||||||
int finaltokcount = (int)current_context_tokens.size()-realnpredict;
|
int finaltokcount = (int)current_context_tokens.size()-realnpredict;
|
||||||
|
|
75
koboldcpp.py
75
koboldcpp.py
|
@ -190,6 +190,7 @@ class load_model_inputs(ctypes.Structure):
|
||||||
("tensor_split", ctypes.c_float * tensor_split_max),
|
("tensor_split", ctypes.c_float * tensor_split_max),
|
||||||
("quant_k", ctypes.c_int),
|
("quant_k", ctypes.c_int),
|
||||||
("quant_v", ctypes.c_int),
|
("quant_v", ctypes.c_int),
|
||||||
|
("check_slowness", ctypes.c_bool),
|
||||||
("quiet", ctypes.c_bool),
|
("quiet", ctypes.c_bool),
|
||||||
("debugmode", ctypes.c_int)]
|
("debugmode", ctypes.c_int)]
|
||||||
|
|
||||||
|
@ -1234,6 +1235,7 @@ def load_model(model_filename):
|
||||||
inputs.load_guidance = args.enableguidance
|
inputs.load_guidance = args.enableguidance
|
||||||
inputs.override_kv = args.overridekv.encode("UTF-8") if args.overridekv else "".encode("UTF-8")
|
inputs.override_kv = args.overridekv.encode("UTF-8") if args.overridekv else "".encode("UTF-8")
|
||||||
inputs.override_tensors = args.overridetensors.encode("UTF-8") if args.overridetensors else "".encode("UTF-8")
|
inputs.override_tensors = args.overridetensors.encode("UTF-8") if args.overridetensors else "".encode("UTF-8")
|
||||||
|
inputs.check_slowness = (not args.highpriority and os.name == 'nt' and 'Intel' in platform.processor())
|
||||||
inputs = set_backend_props(inputs)
|
inputs = set_backend_props(inputs)
|
||||||
ret = handle.load_model(inputs)
|
ret = handle.load_model(inputs)
|
||||||
return ret
|
return ret
|
||||||
|
@ -5577,60 +5579,6 @@ def setuptunnel(global_memory, has_sd):
|
||||||
print(str(ex))
|
print(str(ex))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def unload_libs():
|
|
||||||
global handle
|
|
||||||
OS = platform.system()
|
|
||||||
dll_close = None
|
|
||||||
if OS == "Windows": # pragma: Windows
|
|
||||||
from ctypes import wintypes
|
|
||||||
dll_close = ctypes.windll.kernel32.FreeLibrary
|
|
||||||
dll_close.argtypes = [wintypes.HMODULE]
|
|
||||||
dll_close.restype = ctypes.c_int
|
|
||||||
elif OS == "Darwin":
|
|
||||||
try:
|
|
||||||
try: # macOS 11 (Big Sur). Possibly also later macOS 10s.
|
|
||||||
stdlib = ctypes.CDLL("libc.dylib")
|
|
||||||
except OSError:
|
|
||||||
stdlib = ctypes.CDLL("libSystem")
|
|
||||||
except OSError:
|
|
||||||
# Older macOSs. Not only is the name inconsistent but it's
|
|
||||||
# not even in PATH.
|
|
||||||
stdlib = ctypes.CDLL("/usr/lib/system/libsystem_c.dylib")
|
|
||||||
dll_close = stdlib.dlclose
|
|
||||||
dll_close.argtypes = [ctypes.c_void_p]
|
|
||||||
dll_close.restype = ctypes.c_int
|
|
||||||
elif OS == "Linux":
|
|
||||||
try:
|
|
||||||
stdlib = ctypes.CDLL("")
|
|
||||||
except OSError:
|
|
||||||
stdlib = ctypes.CDLL("libc.so") # Alpine Linux.
|
|
||||||
dll_close = stdlib.dlclose
|
|
||||||
dll_close.argtypes = [ctypes.c_void_p]
|
|
||||||
dll_close.restype = ctypes.c_int
|
|
||||||
elif sys.platform == "msys":
|
|
||||||
# msys can also use `ctypes.CDLL("kernel32.dll").FreeLibrary()`.
|
|
||||||
stdlib = ctypes.CDLL("msys-2.0.dll")
|
|
||||||
dll_close = stdlib.dlclose
|
|
||||||
dll_close.argtypes = [ctypes.c_void_p]
|
|
||||||
dll_close.restype = ctypes.c_int
|
|
||||||
elif sys.platform == "cygwin":
|
|
||||||
stdlib = ctypes.CDLL("cygwin1.dll")
|
|
||||||
dll_close = stdlib.dlclose
|
|
||||||
dll_close.argtypes = [ctypes.c_void_p]
|
|
||||||
dll_close.restype = ctypes.c_int
|
|
||||||
elif OS == "FreeBSD":
|
|
||||||
# FreeBSD uses `/usr/lib/libc.so.7` where `7` is another version number.
|
|
||||||
# It is not in PATH but using its name instead of its path is somehow the
|
|
||||||
# only way to open it. The name must include the .so.7 suffix.
|
|
||||||
stdlib = ctypes.CDLL("libc.so.7")
|
|
||||||
dll_close = stdlib.close
|
|
||||||
|
|
||||||
if handle and dll_close:
|
|
||||||
print("Unloading Libraries...")
|
|
||||||
dll_close(handle._handle)
|
|
||||||
del handle
|
|
||||||
handle = None
|
|
||||||
|
|
||||||
def reload_from_new_args(newargs):
|
def reload_from_new_args(newargs):
|
||||||
try:
|
try:
|
||||||
args.istemplate = False
|
args.istemplate = False
|
||||||
|
@ -5766,10 +5714,23 @@ def downloader_internal(input_url, output_filename, capture_output, min_file_siz
|
||||||
dl_success = False
|
dl_success = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if shutil.which("aria2c") is not None:
|
if os.name == 'nt':
|
||||||
|
basepath = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
a2cexe = (os.path.join(basepath, "aria2c-win.exe"))
|
||||||
|
if os.path.exists(a2cexe): #on windows try using embedded a2cexe
|
||||||
|
rc = subprocess.run([
|
||||||
|
a2cexe, "-x", "16", "-s", "16", "--summary-interval=30", "--console-log-level=error", "--log-level=error",
|
||||||
|
"--download-result=default", "--allow-overwrite=true", "--file-allocation=none", "--max-tries=3", "-o", output_filename, input_url
|
||||||
|
], capture_output=capture_output, text=True, check=True, encoding='utf-8')
|
||||||
|
dl_success = (rc.returncode == 0 and os.path.exists(output_filename) and os.path.getsize(output_filename) > min_file_size)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"aria2c-win failed: {e}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not dl_success and shutil.which("aria2c") is not None:
|
||||||
rc = subprocess.run([
|
rc = subprocess.run([
|
||||||
"aria2c", "-x", "16", "-s", "16", "--summary-interval=30", "--console-log-level=error", "--log-level=error",
|
"aria2c", "-x", "16", "-s", "16", "--summary-interval=30", "--console-log-level=error", "--log-level=error",
|
||||||
"--download-result=default", "--allow-overwrite=true", "--file-allocation=none", "-o", output_filename, input_url
|
"--download-result=default", "--allow-overwrite=true", "--file-allocation=none", "--max-tries=3", "-o", output_filename, input_url
|
||||||
], capture_output=capture_output, text=True, check=True, encoding='utf-8')
|
], capture_output=capture_output, text=True, check=True, encoding='utf-8')
|
||||||
dl_success = (rc.returncode == 0 and os.path.exists(output_filename) and os.path.getsize(output_filename) > min_file_size)
|
dl_success = (rc.returncode == 0 and os.path.exists(output_filename) and os.path.getsize(output_filename) > min_file_size)
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
|
@ -6226,7 +6187,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
||||||
os_used = sys.platform
|
os_used = sys.platform
|
||||||
process = psutil.Process(os.getpid()) # Set high priority for the python script for the CPU
|
process = psutil.Process(os.getpid()) # Set high priority for the python script for the CPU
|
||||||
oldprio = process.nice()
|
oldprio = process.nice()
|
||||||
if os_used == "win32": # Windows (either 32-bit or 64-bit)
|
if os.name == 'nt': # Windows (either 32-bit or 64-bit)
|
||||||
process.nice(psutil.REALTIME_PRIORITY_CLASS)
|
process.nice(psutil.REALTIME_PRIORITY_CLASS)
|
||||||
print("High Priority for Windows Set: " + str(oldprio) + " to " + str(process.nice()))
|
print("High Priority for Windows Set: " + str(oldprio) + " to " + str(process.nice()))
|
||||||
elif os_used == "linux": # linux
|
elif os_used == "linux": # linux
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue