more linting with Ruff (+1 squashed commits)

Squashed commits:

[43802cfe2] Applied default Ruff linting
This commit is contained in:
Concedo 2024-12-01 00:56:39 +08:00
parent 409e393d10
commit b7cd210cd2

View file

@ -9,11 +9,20 @@
# scenarios and everything Kobold and KoboldAI Lite have to offer. # scenarios and everything Kobold and KoboldAI Lite have to offer.
import ctypes import ctypes
import os, math, re import os
import math
import re
import argparse import argparse
import platform import platform
import base64 import base64
import json, sys, http.server, time, asyncio, socket, threading import struct
import json
import sys
import http.server
import time
import asyncio
import socket
import threading
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timezone from datetime import datetime, timezone
@ -298,7 +307,7 @@ def restore_stdout():
def get_default_threads(): def get_default_threads():
physical_core_limit = 1 physical_core_limit = 1
if os.cpu_count()!=None and os.cpu_count()>1: if os.cpu_count() is not None and os.cpu_count()>1:
physical_core_limit = os.cpu_count() // 2 physical_core_limit = os.cpu_count() // 2
default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1)) default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1))
processor = platform.processor() processor = platform.processor()
@ -521,8 +530,8 @@ def set_backend_props(inputs):
if args.usevulkan: #is an empty array if using vulkan without defined gpu if args.usevulkan: #is an empty array if using vulkan without defined gpu
s = "" s = ""
for l in range(0,len(args.usevulkan)): for it in range(0,len(args.usevulkan)):
s += str(args.usevulkan[l]) s += str(args.usevulkan[it])
inputs.vulkan_info = s.encode("UTF-8") inputs.vulkan_info = s.encode("UTF-8")
else: else:
inputs.vulkan_info = "".encode("UTF-8") inputs.vulkan_info = "".encode("UTF-8")
@ -593,7 +602,7 @@ def unpack_to_dir(destpath = ""):
messagebox.showerror("Error", f"An error occurred while unpacking: {e}") messagebox.showerror("Error", f"An error occurred while unpacking: {e}")
else: else:
if cliunpack: if cliunpack:
print(f"The target folder is not empty or invalid. Please select an empty folder.") print("The target folder is not empty or invalid. Please select an empty folder.")
else: else:
messagebox.showwarning("Invalid Selection", "The target folder is not empty or invalid. Please select an empty folder.") messagebox.showwarning("Invalid Selection", "The target folder is not empty or invalid. Please select an empty folder.")
@ -647,8 +656,6 @@ def string_contains_or_overlaps_sequence_substring(inputstr, sequences):
return True return True
return False return False
import struct
def read_gguf_metadata(file_path): def read_gguf_metadata(file_path):
chunk_size = 8192 # read only first 8kb of file chunk_size = 8192 # read only first 8kb of file
try: try:
@ -681,7 +688,7 @@ def read_gguf_metadata(file_path):
key_length = read_gguf_key(b'.attention.key_length',data,8192) key_length = read_gguf_key(b'.attention.key_length',data,8192)
val_length = read_gguf_key(b'.attention.value_length',data,8192) val_length = read_gguf_key(b'.attention.value_length',data,8192)
return [layercount,head_count_kv, max(key_length,val_length)] return [layercount,head_count_kv, max(key_length,val_length)]
except Exception as ex: except Exception:
return None return None
def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath): def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath):
@ -705,7 +712,7 @@ def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,
if fsize>10000000: #dont bother with models < 10mb as they are probably bad if fsize>10000000: #dont bother with models < 10mb as they are probably bad
ggufmeta = read_gguf_metadata(filepath) ggufmeta = read_gguf_metadata(filepath)
modelfile_extracted_meta = [ggufmeta,fsize,sdfsize,whisperfsize,mmprojsize,draftmodelsize] #extract done. note that meta may be null modelfile_extracted_meta = [ggufmeta,fsize,sdfsize,whisperfsize,mmprojsize,draftmodelsize] #extract done. note that meta may be null
except Exception as ex: except Exception:
modelfile_extracted_meta = None modelfile_extracted_meta = None
def autoset_gpu_layers(ctxsize,sdquanted,bbs): #shitty algo to determine how many layers to use def autoset_gpu_layers(ctxsize,sdquanted,bbs): #shitty algo to determine how many layers to use
@ -757,7 +764,7 @@ def autoset_gpu_layers(ctxsize,sdquanted,bbs): #shitty algo to determine how man
layerlimit = min(int(ratio*layers), (layers + 3)) layerlimit = min(int(ratio*layers), (layers + 3))
layerlimit = (0 if layerlimit<=2 else layerlimit) layerlimit = (0 if layerlimit<=2 else layerlimit)
return layerlimit return layerlimit
except Exception as ex: except Exception:
return 0 return 0
def fetch_gpu_properties(testCL,testCU,testVK): def fetch_gpu_properties(testCL,testCU,testVK):
@ -773,7 +780,7 @@ def fetch_gpu_properties(testCL,testCU,testVK):
FetchedCUdevices = [line.split(",")[0].strip() for line in output.splitlines()] FetchedCUdevices = [line.split(",")[0].strip() for line in output.splitlines()]
FetchedCUdeviceMem = [line.split(",")[1].strip().split(" ")[0].strip() for line in output.splitlines()] FetchedCUdeviceMem = [line.split(",")[1].strip().split(" ")[0].strip() for line in output.splitlines()]
FetchedCUfreeMem = [line.split(",")[2].strip().split(" ")[0].strip() for line in output.splitlines()] FetchedCUfreeMem = [line.split(",")[2].strip().split(" ")[0].strip() for line in output.splitlines()]
except Exception as e: except Exception:
pass pass
if len(FetchedCUdevices)==0: if len(FetchedCUdevices)==0:
try: # Get AMD ROCm GPU names try: # Get AMD ROCm GPU names
@ -781,16 +788,18 @@ def fetch_gpu_properties(testCL,testCU,testVK):
device_name = None device_name = None
for line in output.splitlines(): # read through the output line by line for line in output.splitlines(): # read through the output line by line
line = line.strip() line = line.strip()
if line.startswith("Marketing Name:"): device_name = line.split(":", 1)[1].strip() # if we find a named device, temporarily save the name if line.startswith("Marketing Name:"):
device_name = line.split(":", 1)[1].strip() # if we find a named device, temporarily save the name
elif line.startswith("Device Type:") and "GPU" in line and device_name is not None: # if the following Device Type is a GPU (not a CPU) then add it to devices list elif line.startswith("Device Type:") and "GPU" in line and device_name is not None: # if the following Device Type is a GPU (not a CPU) then add it to devices list
FetchedCUdevices.append(device_name) FetchedCUdevices.append(device_name)
AMDgpu = True AMDgpu = True
elif line.startswith("Device Type:") and "GPU" not in line: device_name = None elif line.startswith("Device Type:") and "GPU" not in line:
device_name = None
if FetchedCUdevices: if FetchedCUdevices:
getamdvram = subprocess.run(['rocm-smi', '--showmeminfo', 'vram', '--csv'], capture_output=True, text=True, check=True, encoding='utf-8').stdout # fetch VRAM of devices getamdvram = subprocess.run(['rocm-smi', '--showmeminfo', 'vram', '--csv'], capture_output=True, text=True, check=True, encoding='utf-8').stdout # fetch VRAM of devices
if getamdvram: if getamdvram:
FetchedCUdeviceMem = [line.split(",")[1].strip() for line in getamdvram.splitlines()[1:] if line.strip()] FetchedCUdeviceMem = [line.split(",")[1].strip() for line in getamdvram.splitlines()[1:] if line.strip()]
except Exception as e: except Exception:
pass pass
lowestcumem = 0 lowestcumem = 0
lowestfreecumem = 0 lowestfreecumem = 0
@ -823,7 +832,7 @@ def fetch_gpu_properties(testCL,testCU,testVK):
if idx<len(VKIsDGPU): if idx<len(VKIsDGPU):
VKIsDGPU[idx] = (1 if dvtype=="PHYSICAL_DEVICE_TYPE_DISCRETE_GPU" else 0) VKIsDGPU[idx] = (1 if dvtype=="PHYSICAL_DEVICE_TYPE_DISCRETE_GPU" else 0)
idx += 1 idx += 1
except Exception as e: except Exception:
pass pass
if testCL: if testCL:
@ -834,7 +843,7 @@ def fetch_gpu_properties(testCL,testCU,testVK):
try: try:
output = subprocess.run(["clinfo","--json"], capture_output=True, text=True, check=True, encoding='utf-8').stdout output = subprocess.run(["clinfo","--json"], capture_output=True, text=True, check=True, encoding='utf-8').stdout
data = json.loads(output) data = json.loads(output)
except Exception as e1: except Exception:
output = subprocess.run([((os.path.join(basepath, "winclinfo.exe")) if os.name == 'nt' else "clinfo"),"--json"], capture_output=True, text=True, check=True, creationflags=subprocess.CREATE_NO_WINDOW | subprocess.DETACHED_PROCESS, encoding='utf-8').stdout output = subprocess.run([((os.path.join(basepath, "winclinfo.exe")) if os.name == 'nt' else "clinfo"),"--json"], capture_output=True, text=True, check=True, creationflags=subprocess.CREATE_NO_WINDOW | subprocess.DETACHED_PROCESS, encoding='utf-8').stdout
data = json.loads(output) data = json.loads(output)
plat = 0 plat = 0
@ -852,7 +861,7 @@ def fetch_gpu_properties(testCL,testCU,testVK):
dev += 1 dev += 1
plat += 1 plat += 1
MaxMemory[0] = max(lowestclmem,MaxMemory[0]) MaxMemory[0] = max(lowestclmem,MaxMemory[0])
except Exception as e: except Exception:
pass pass
return return
@ -1318,12 +1327,12 @@ def extract_json_from_string(input_string):
try: # First check if model exported perfect json try: # First check if model exported perfect json
parsed_json = json.loads(input_string) parsed_json = json.loads(input_string)
return parsed_json return parsed_json
except Exception as e: except Exception:
pass pass
try: # Next check if all we need is to add brackets to make it perfect json try: # Next check if all we need is to add brackets to make it perfect json
parsed_json = json.loads(f"[{input_string}]") parsed_json = json.loads(f"[{input_string}]")
return parsed_json return parsed_json
except Exception as e: except Exception:
pass pass
try: try:
# Now use regular expression to match JSON objects or arrays in case part is valid json and part is not # Now use regular expression to match JSON objects or arrays in case part is valid json and part is not
@ -1333,9 +1342,9 @@ def extract_json_from_string(input_string):
try: try:
parsed_json = json.loads(potential_json) parsed_json = json.loads(potential_json)
return parsed_json return parsed_json
except Exception as e: except Exception:
continue continue
except Exception as e: except Exception:
pass pass
return [] return []
@ -1383,7 +1392,7 @@ def transform_genparams(genparams, api_format):
rp3 = genparams.get('rep_pen', 1.0) rp3 = genparams.get('rep_pen', 1.0)
rp_max = max(rp1,rp2,rp3) rp_max = max(rp1,rp2,rp3)
genparams["rep_pen"] = rp_max genparams["rep_pen"] = rp_max
if "use_default_badwordsids" in genparams and not ("ban_eos_token" in genparams): if "use_default_badwordsids" in genparams and "ban_eos_token" not in genparams:
genparams["ban_eos_token"] = genparams.get('use_default_badwordsids', False) genparams["ban_eos_token"] = genparams.get('use_default_badwordsids', False)
if api_format==1: if api_format==1:
@ -1451,7 +1460,7 @@ def transform_genparams(genparams, api_format):
if message['role'] == "user" and message_index == len(messages_array): if message['role'] == "user" and message_index == len(messages_array):
# Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
tools_array = genparams.get('tools', []) tools_array = genparams.get('tools', [])
if tools_array and len(tools_array) > 0 and genparams.get('tool_choice',None) != None: if tools_array and len(tools_array) > 0 and genparams.get('tool_choice',None) is not None:
response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}] response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}]
json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0) json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0)
tools_string = json.dumps(tools_array, indent=0) tools_string = json.dumps(tools_array, indent=0)
@ -1461,7 +1470,7 @@ def transform_genparams(genparams, api_format):
try: try:
specified_function = genparams.get('tool_choice').get('function').get('name') specified_function = genparams.get('tool_choice').get('function').get('name')
json_formatting_instruction = f"The user is asking you to use the style of this JSON object formatting to complete the parameters for the specific function named {specified_function} in the following format: " + json.dumps([{"id": "insert an id for the response", "type": "function", "function": {"name": f"{specified_function}", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}], indent=0) json_formatting_instruction = f"The user is asking you to use the style of this JSON object formatting to complete the parameters for the specific function named {specified_function} in the following format: " + json.dumps([{"id": "insert an id for the response", "type": "function", "function": {"name": f"{specified_function}", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}], indent=0)
except Exception as e: except Exception:
# In case of any issues, just revert back to no specified function # In case of any issues, just revert back to no specified function
pass pass
messages_string += json_formatting_instruction messages_string += json_formatting_instruction
@ -1671,7 +1680,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
self.wfile.flush() self.wfile.flush()
async def send_kai_sse_event(self, data): async def send_kai_sse_event(self, data):
self.wfile.write(f'event: message\n'.encode()) self.wfile.write('event: message\n'.encode())
self.wfile.write(f'data: {data}\n\n'.encode()) self.wfile.write(f'data: {data}\n\n'.encode())
self.wfile.flush() self.wfile.flush()
@ -1803,11 +1812,11 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
auth_header = self.headers['Authorization'] auth_header = self.headers['Authorization']
elif 'authorization' in self.headers: elif 'authorization' in self.headers:
auth_header = self.headers['authorization'] auth_header = self.headers['authorization']
if auth_header != None and auth_header.startswith('Bearer '): if auth_header is not None and auth_header.startswith('Bearer '):
token = auth_header[len('Bearer '):].strip() token = auth_header[len('Bearer '):].strip()
if token==password: if token==password:
auth_ok = True auth_ok = True
if auth_ok==False: if auth_ok is False:
self.send_response(401) self.send_response(401)
self.end_headers(content_type='application/json') self.end_headers(content_type='application/json')
self.wfile.write(json.dumps({"detail": { self.wfile.write(json.dumps({"detail": {
@ -1847,7 +1856,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
epurl = f"{httpsaffix}://localhost:{args.port}" epurl = f"{httpsaffix}://localhost:{args.port}"
if args.host!="": if args.host!="":
epurl = f"{httpsaffix}://{args.host}:{args.port}" epurl = f"{httpsaffix}://{args.host}:{args.port}"
gen_payload = {"prompt": prompt,"max_length": max_length,"temperature": temperature,"prompt": prompt,"top_k": top_k,"top_p": top_p,"rep_pen": rep_pen,"ban_eos_token":ban_eos_token} gen_payload = {"prompt": prompt,"max_length": max_length,"temperature": temperature,"top_k": top_k,"top_p": top_p,"rep_pen": rep_pen,"ban_eos_token":ban_eos_token}
respjson = make_url_request(f'{epurl}/api/v1/generate', gen_payload) respjson = make_url_request(f'{epurl}/api/v1/generate', gen_payload)
reply = html.escape(respjson["results"][0]["text"]) reply = html.escape(respjson["results"][0]["text"])
status = "Generation Completed" status = "Generation Completed"
@ -1928,7 +1937,7 @@ Enter Prompt:<br>
auth_header = self.headers['Authorization'] auth_header = self.headers['Authorization']
elif 'authorization' in self.headers: elif 'authorization' in self.headers:
auth_header = self.headers['authorization'] auth_header = self.headers['authorization']
if auth_header != None and auth_header.startswith('Bearer '): if auth_header is not None and auth_header.startswith('Bearer '):
token = auth_header[len('Bearer '):].strip() token = auth_header[len('Bearer '):].strip()
if token==password: if token==password:
auth_ok = True auth_ok = True
@ -2048,20 +2057,20 @@ Enter Prompt:<br>
elif self.path=="/api" or self.path=="/docs" or self.path.startswith(('/api/?json=','/api?json=','/docs/?json=','/docs?json=')): elif self.path=="/api" or self.path=="/docs" or self.path.startswith(('/api/?json=','/api?json=','/docs/?json=','/docs?json=')):
content_type = 'text/html' content_type = 'text/html'
if embedded_kcpp_docs is None: if embedded_kcpp_docs is None:
response_body = (f"KoboldCpp API is running!\n\nAPI usage reference can be found at the wiki: https://github.com/LostRuins/koboldcpp/wiki").encode() response_body = ("KoboldCpp API is running!\n\nAPI usage reference can be found at the wiki: https://github.com/LostRuins/koboldcpp/wiki").encode()
else: else:
response_body = embedded_kcpp_docs response_body = embedded_kcpp_docs
elif self.path.startswith(("/sdui")): elif self.path.startswith(("/sdui")):
content_type = 'text/html' content_type = 'text/html'
if embedded_kcpp_sdui is None: if embedded_kcpp_sdui is None:
response_body = (f"KoboldCpp API is running, but KCPP SDUI is not loaded").encode() response_body = ("KoboldCpp API is running, but KCPP SDUI is not loaded").encode()
else: else:
response_body = embedded_kcpp_sdui response_body = embedded_kcpp_sdui
elif self.path=="/v1": elif self.path=="/v1":
content_type = 'text/html' content_type = 'text/html'
response_body = (f"KoboldCpp OpenAI compatible endpoint is running!\n\nFor usage reference, see https://platform.openai.com/docs/api-reference").encode() response_body = ("KoboldCpp OpenAI compatible endpoint is running!\n\nFor usage reference, see https://platform.openai.com/docs/api-reference").encode()
elif self.path=="/api/extra/preloadstory": elif self.path=="/api/extra/preloadstory":
if preloaded_story is None: if preloaded_story is None:
@ -2128,7 +2137,7 @@ Enter Prompt:<br>
self.rfile.readline() self.rfile.readline()
if chunk_length == 0: if chunk_length == 0:
break break
except Exception as e: except Exception:
self.send_response(500) self.send_response(500)
self.end_headers(content_type='application/json') self.end_headers(content_type='application/json')
self.wfile.write(json.dumps({"detail": { self.wfile.write(json.dumps({"detail": {
@ -2177,7 +2186,7 @@ Enter Prompt:<br>
tempbody = json.loads(body) tempbody = json.loads(body)
if isinstance(tempbody, dict): if isinstance(tempbody, dict):
multiuserkey = tempbody.get('genkey', "") multiuserkey = tempbody.get('genkey', "")
except Exception as e: except Exception:
multiuserkey = "" multiuserkey = ""
pass pass
if (multiuserkey=="" and requestsinqueue==0) or (multiuserkey!="" and multiuserkey==currentusergenkey): if (multiuserkey=="" and requestsinqueue==0) or (multiuserkey!="" and multiuserkey==currentusergenkey):
@ -2200,7 +2209,7 @@ Enter Prompt:<br>
tempbody = json.loads(body) tempbody = json.loads(body)
if isinstance(tempbody, dict): if isinstance(tempbody, dict):
multiuserkey = tempbody.get('genkey', "") multiuserkey = tempbody.get('genkey', "")
except Exception as e: except Exception:
multiuserkey = "" multiuserkey = ""
if totalgens>0: if totalgens>0:
@ -2218,7 +2227,7 @@ Enter Prompt:<br>
tempbody = json.loads(body) tempbody = json.loads(body)
if isinstance(tempbody, dict): if isinstance(tempbody, dict):
multiuserkey = tempbody.get('genkey', "") multiuserkey = tempbody.get('genkey', "")
except Exception as e: except Exception:
multiuserkey = "" multiuserkey = ""
if totalgens>0: if totalgens>0:
@ -2240,7 +2249,7 @@ Enter Prompt:<br>
if isinstance(tempbody, dict): if isinstance(tempbody, dict):
sender = tempbody.get('sender', "") sender = tempbody.get('sender', "")
senderbusy = tempbody.get('senderbusy', False) senderbusy = tempbody.get('senderbusy', False)
except Exception as e: except Exception:
pass pass
if sender!="" and senderbusy: if sender!="" and senderbusy:
multiplayer_lastactive[sender] = int(time.time()) multiplayer_lastactive[sender] = int(time.time())
@ -2380,7 +2389,7 @@ Enter Prompt:<br>
genparams = None genparams = None
try: try:
genparams = json.loads(body) genparams = json.loads(body)
except Exception as e: except Exception:
genparams = None genparams = None
if is_transcribe: #fallback handling of file uploads if is_transcribe: #fallback handling of file uploads
b64wav = self.extract_b64string_from_file_upload(body) b64wav = self.extract_b64string_from_file_upload(body)
@ -2399,7 +2408,7 @@ Enter Prompt:<br>
is_quiet = args.quiet is_quiet = args.quiet
if (args.debugmode != -1 and not is_quiet) or args.debugmode >= 1: if (args.debugmode != -1 and not is_quiet) or args.debugmode >= 1:
utfprint(f"\nInput: " + json.dumps(genparams)) utfprint("\nInput: " + json.dumps(genparams))
if args.foreground: if args.foreground:
bring_terminal_to_foreground() bring_terminal_to_foreground()
@ -2497,7 +2506,7 @@ def is_port_in_use(portNum):
import socket import socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return s.connect_ex(('localhost', portNum)) == 0 return s.connect_ex(('localhost', portNum)) == 0
except Exception as ex: except Exception:
return True return True
def is_ipv6_supported(): def is_ipv6_supported():
@ -2508,7 +2517,7 @@ def is_ipv6_supported():
sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1)
sock.close() sock.close()
return True return True
except Exception as ex: except Exception:
return False return False
def RunServerMultiThreaded(addr, port): def RunServerMultiThreaded(addr, port):
@ -2542,7 +2551,7 @@ def RunServerMultiThreaded(addr, port):
try: try:
ipv6_sock.bind((addr, port)) ipv6_sock.bind((addr, port))
ipv6_sock.listen(numThreads) ipv6_sock.listen(numThreads)
except Exception as ex: except Exception:
ipv6_sock = None ipv6_sock = None
print("IPv6 Socket Failed to Bind. IPv6 will be unavailable.") print("IPv6 Socket Failed to Bind. IPv6 will be unavailable.")
@ -2619,7 +2628,7 @@ def show_gui():
import darkdetect as darkdt import darkdetect as darkdt
darkdt.isDark() darkdt.isDark()
pass pass
except Exception as e: except Exception:
pass pass
import customtkinter as ctk import customtkinter as ctk
@ -2727,7 +2736,7 @@ def show_gui():
blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"] blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"]
blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"] blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"]
contextsize_text = ["256", "512", "1024", "2048", "3072", "4096", "6144", "8192", "12288", "16384", "24576", "32768", "49152", "65536", "98304", "131072"] contextsize_text = ["256", "512", "1024", "2048", "3072", "4096", "6144", "8192", "12288", "16384", "24576", "32768", "49152", "65536", "98304", "131072"]
antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if not (opt in runopts)] antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if opt not in runopts]
quantkv_text = ["F16 (Off)","8-Bit","4-Bit"] quantkv_text = ["F16 (Off)","8-Bit","4-Bit"]
if not any(runopts): if not any(runopts):
@ -2942,8 +2951,8 @@ def show_gui():
def setup_backend_tooltip(parent): def setup_backend_tooltip(parent):
# backend count label with the tooltip function # backend count label with the tooltip function
nl = '\n' nl = '\n'
tooltxt = f"Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "") tooltxt = "Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "")
num_backends_built = makelabel(parent, str(len(runopts)) + f"/8", 5, 2,tooltxt) num_backends_built = makelabel(parent, str(len(runopts)) + "/8", 5, 2,tooltxt)
num_backends_built.grid(row=1, column=1, padx=195, pady=0) num_backends_built.grid(row=1, column=1, padx=195, pady=0)
num_backends_built.configure(text_color="#00ff00") num_backends_built.configure(text_color="#00ff00")
@ -2967,17 +2976,17 @@ def show_gui():
layercounter_label.grid(row=6, column=1, padx=75, sticky="W") layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W") quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
if sys.platform=="darwin" and gpulayers_var.get()=="-1": if sys.platform=="darwin" and gpulayers_var.get()=="-1":
quick_layercounter_label.configure(text=f"(Auto: All Layers)") quick_layercounter_label.configure(text="(Auto: All Layers)")
layercounter_label.configure(text=f"(Auto: All Layers)") layercounter_label.configure(text="(Auto: All Layers)")
elif gpu_be and gpulayers_var.get()=="-1" and predicted_gpu_layers>0: elif gpu_be and gpulayers_var.get()=="-1" and predicted_gpu_layers>0:
quick_layercounter_label.configure(text=f"(Auto: {predicted_gpu_layers}{max_gpu_layers} Layers)") quick_layercounter_label.configure(text=f"(Auto: {predicted_gpu_layers}{max_gpu_layers} Layers)")
layercounter_label.configure(text=f"(Auto: {predicted_gpu_layers}{max_gpu_layers} Layers)") layercounter_label.configure(text=f"(Auto: {predicted_gpu_layers}{max_gpu_layers} Layers)")
elif gpu_be and gpulayers_var.get()=="-1" and predicted_gpu_layers<=0 and (modelfile_extracted_meta and modelfile_extracted_meta[1]): elif gpu_be and gpulayers_var.get()=="-1" and predicted_gpu_layers<=0 and (modelfile_extracted_meta and modelfile_extracted_meta[1]):
quick_layercounter_label.configure(text=f"(Auto: No Offload)") quick_layercounter_label.configure(text="(Auto: No Offload)")
layercounter_label.configure(text=f"(Auto: No Offload)") layercounter_label.configure(text="(Auto: No Offload)")
elif gpu_be and gpulayers_var.get()=="": elif gpu_be and gpulayers_var.get()=="":
quick_layercounter_label.configure(text=f"(Set -1 for Auto)") quick_layercounter_label.configure(text="(Set -1 for Auto)")
layercounter_label.configure(text=f"(Set -1 for Auto)") layercounter_label.configure(text="(Set -1 for Auto)")
else: else:
layercounter_label.grid_remove() layercounter_label.grid_remove()
quick_layercounter_label.grid_remove() quick_layercounter_label.grid_remove()
@ -3000,7 +3009,7 @@ def show_gui():
else: else:
quick_gpuname_label.configure(text=CUDevicesNames[s]) quick_gpuname_label.configure(text=CUDevicesNames[s])
gpuname_label.configure(text=CUDevicesNames[s]) gpuname_label.configure(text=CUDevicesNames[s])
except Exception as ex: except Exception:
pass pass
else: else:
quick_gpuname_label.configure(text="") quick_gpuname_label.configure(text="")
@ -3395,7 +3404,7 @@ def show_gui():
savdict["tensor_split"] = None savdict["tensor_split"] = None
savdict["config"] = None savdict["config"] = None
filename = asksaveasfile(filetypes=file_type, defaultextension=file_type) filename = asksaveasfile(filetypes=file_type, defaultextension=file_type)
if filename == None: if filename is None:
return return
file = open(str(filename.name), 'a') file = open(str(filename.name), 'a')
file.write(json.dumps(savdict)) file.write(json.dumps(savdict))
@ -3501,10 +3510,10 @@ def show_gui():
args.chatcompletionsadapter = None if chatcompletionsadapter_var.get() == "" else chatcompletionsadapter_var.get() args.chatcompletionsadapter = None if chatcompletionsadapter_var.get() == "" else chatcompletionsadapter_var.get()
try: try:
if kcpp_exporting_template and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter): if kcpp_exporting_template and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter):
print(f"Embedding chat completions adapter...") # parse and save embedded preload story print("Embedding chat completions adapter...") # parse and save embedded preload story
with open(args.chatcompletionsadapter, 'r') as f: with open(args.chatcompletionsadapter, 'r') as f:
args.chatcompletionsadapter = json.load(f) args.chatcompletionsadapter = json.load(f)
except Exception as ex2: except Exception:
pass pass
args.model_param = None if model_var.get() == "" else model_var.get() args.model_param = None if model_var.get() == "" else model_var.get()
@ -3512,10 +3521,10 @@ def show_gui():
args.preloadstory = None if preloadstory_var.get() == "" else preloadstory_var.get() args.preloadstory = None if preloadstory_var.get() == "" else preloadstory_var.get()
try: try:
if kcpp_exporting_template and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory): if kcpp_exporting_template and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory):
print(f"Embedding preload story...") # parse and save embedded preload story print("Embedding preload story...") # parse and save embedded preload story
with open(args.preloadstory, 'r') as f: with open(args.preloadstory, 'r') as f:
args.preloadstory = json.load(f) args.preloadstory = json.load(f)
except Exception as ex2: except Exception:
pass pass
args.mmproj = None if mmproj_var.get() == "" else mmproj_var.get() args.mmproj = None if mmproj_var.get() == "" else mmproj_var.get()
args.draftmodel = None if draftmodel_var.get() == "" else draftmodel_var.get() args.draftmodel = None if draftmodel_var.get() == "" else draftmodel_var.get()
@ -3732,7 +3741,8 @@ def show_gui():
savdict = json.loads(json.dumps(args.__dict__)) savdict = json.loads(json.dumps(args.__dict__))
file_type = [("KoboldCpp Settings", "*.kcpps")] file_type = [("KoboldCpp Settings", "*.kcpps")]
filename = asksaveasfile(filetypes=file_type, defaultextension=file_type) filename = asksaveasfile(filetypes=file_type, defaultextension=file_type)
if filename == None: return if filename is None:
return
file = open(str(filename.name), 'a') file = open(str(filename.name), 'a')
file.write(json.dumps(savdict)) file.write(json.dumps(savdict))
file.close() file.close()
@ -3754,19 +3764,19 @@ def show_gui():
try: try:
import webbrowser as wb import webbrowser as wb
wb.open("https://github.com/LostRuins/koboldcpp/wiki") wb.open("https://github.com/LostRuins/koboldcpp/wiki")
except: except Exception:
print("Cannot launch help in browser.") print("Cannot launch help in browser.")
def display_help_models(): def display_help_models():
try: try:
import webbrowser as wb import webbrowser as wb
wb.open("https://github.com/LostRuins/koboldcpp/wiki#what-models-does-koboldcpp-support-what-architectures-are-supported") wb.open("https://github.com/LostRuins/koboldcpp/wiki#what-models-does-koboldcpp-support-what-architectures-are-supported")
except: except Exception:
print("Cannot launch help in browser.") print("Cannot launch help in browser.")
def display_updates(): def display_updates():
try: try:
import webbrowser as wb import webbrowser as wb
wb.open("https://github.com/LostRuins/koboldcpp/releases/latest") wb.open("https://github.com/LostRuins/koboldcpp/releases/latest")
except: except Exception:
print("Cannot launch updates in browser.") print("Cannot launch updates in browser.")
ctk.CTkButton(tabs , text = "Launch", fg_color="#2f8d3c", hover_color="#2faa3c", command = guilaunch, width=80, height = 35 ).grid(row=1,column=1, stick="se", padx= 25, pady=5) ctk.CTkButton(tabs , text = "Launch", fg_color="#2f8d3c", hover_color="#2faa3c", command = guilaunch, width=80, height = 35 ).grid(row=1,column=1, stick="se", padx= 25, pady=5)
@ -3820,7 +3830,7 @@ def show_gui_msgbox(title,message):
messagebox.showerror(title=title, message=message) messagebox.showerror(title=title, message=message)
root.withdraw() root.withdraw()
root.quit() root.quit()
except Exception as ex2: except Exception:
pass pass
def show_gui_yesnobox(title,message): def show_gui_yesnobox(title,message):
@ -3834,7 +3844,7 @@ def show_gui_yesnobox(title,message):
root.withdraw() root.withdraw()
root.quit() root.quit()
return result return result
except Exception as ex2: except Exception:
return False return False
pass pass
@ -3842,7 +3852,8 @@ def print_with_time(txt):
print(f"{datetime.now().strftime('[%H:%M:%S]')} " + txt, flush=True) print(f"{datetime.now().strftime('[%H:%M:%S]')} " + txt, flush=True)
def make_url_request(url, data, method='POST', headers={}): def make_url_request(url, data, method='POST', headers={}):
import urllib.request, ssl import urllib.request
import ssl
global nocertify global nocertify
try: try:
request = None request = None
@ -3889,7 +3900,7 @@ def run_horde_worker(args, api_key, worker_name):
reply = make_url_request_horde(url, submit_dict) reply = make_url_request_horde(url, submit_dict)
if not reply: if not reply:
punishcounter += 1 punishcounter += 1
print_with_time(f"Error, Job submit failed.") print_with_time("Error, Job submit failed.")
else: else:
reward = reply["reward"] reward = reply["reward"]
session_kudos_earned += reward session_kudos_earned += reward
@ -3925,7 +3936,7 @@ def run_horde_worker(args, api_key, worker_name):
sleepy_counter = 0 #if this exceeds a value, worker becomes sleepy (slower) sleepy_counter = 0 #if this exceeds a value, worker becomes sleepy (slower)
exitcounter = 0 exitcounter = 0
print(f"===\nEmbedded Horde Worker '{worker_name}' Starting...\n(To use your own Horde Bridge/Scribe worker instead, don't set your API key)\n") print(f"===\nEmbedded Horde Worker '{worker_name}' Starting...\n(To use your own Horde Bridge/Scribe worker instead, don't set your API key)\n")
BRIDGE_AGENT = f"KoboldCppEmbedWorker:2:https://github.com/LostRuins/koboldcpp" BRIDGE_AGENT = "KoboldCppEmbedWorker:2:https://github.com/LostRuins/koboldcpp"
cluster = "https://aihorde.net" cluster = "https://aihorde.net"
while exitcounter < 10: while exitcounter < 10:
time.sleep(3) time.sleep(3)
@ -3944,10 +3955,10 @@ def run_horde_worker(args, api_key, worker_name):
if exitcounter < 10: if exitcounter < 10:
penaltytime = (2 ** exitcounter) penaltytime = (2 ** exitcounter)
print_with_time(f"Horde Worker Paused for {penaltytime} min - Too many errors. It will resume automatically, but you should restart it.") print_with_time(f"Horde Worker Paused for {penaltytime} min - Too many errors. It will resume automatically, but you should restart it.")
print_with_time(f"Caution: Too many failed jobs may lead to entering maintenance mode.") print_with_time("Caution: Too many failed jobs may lead to entering maintenance mode.")
time.sleep(60 * penaltytime) time.sleep(60 * penaltytime)
else: else:
print_with_time(f"Horde Worker Exit limit reached, too many errors.") print_with_time("Horde Worker Exit limit reached, too many errors.")
global last_non_horde_req_time global last_non_horde_req_time
sec_since_non_horde = time.time() - last_non_horde_req_time sec_since_non_horde = time.time() - last_non_horde_req_time
@ -3983,13 +3994,13 @@ def run_horde_worker(args, api_key, worker_name):
time.sleep(slp) time.sleep(slp)
sleepy_counter += 1 sleepy_counter += 1
if sleepy_counter==20: if sleepy_counter==20:
print_with_time(f"No recent jobs, entering low power mode...") print_with_time("No recent jobs, entering low power mode...")
continue continue
sleepy_counter = 0 sleepy_counter = 0
current_id = pop['id'] current_id = pop['id']
current_payload = pop['payload'] current_payload = pop['payload']
print(f"") #empty newline print("") #empty newline
print_with_time(f"Job received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...") print_with_time(f"Job received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...")
#do gen #do gen
@ -4005,11 +4016,11 @@ def run_horde_worker(args, api_key, worker_name):
if currentjob_attempts>5: if currentjob_attempts>5:
break break
print_with_time(f"Server Busy - Not ready to generate...") print_with_time("Server Busy - Not ready to generate...")
time.sleep(5) time.sleep(5)
#submit reply #submit reply
print(f"") #empty newline print("") #empty newline
if current_generation: if current_generation:
submit_dict = { submit_dict = {
"id": current_id, "id": current_id,
@ -4020,15 +4031,15 @@ def run_horde_worker(args, api_key, worker_name):
submit_thread = threading.Thread(target=submit_completed_generation, args=(submiturl, current_id, session_starttime, submit_dict)) submit_thread = threading.Thread(target=submit_completed_generation, args=(submiturl, current_id, session_starttime, submit_dict))
submit_thread.start() #submit job in new thread so nothing is waiting submit_thread.start() #submit job in new thread so nothing is waiting
else: else:
print_with_time(f"Error, Abandoned current job due to errors. Getting new job.") print_with_time("Error, Abandoned current job due to errors. Getting new job.")
current_id = None current_id = None
current_payload = None current_payload = None
time.sleep(0.1) time.sleep(0.1)
if exitcounter<100: if exitcounter<100:
print_with_time(f"Horde Worker Shutdown - Too many errors.") print_with_time("Horde Worker Shutdown - Too many errors.")
else: else:
print_with_time(f"Horde Worker Shutdown - Server Closing.") print_with_time("Horde Worker Shutdown - Server Closing.")
exitcounter = 999 exitcounter = 999
time.sleep(3) time.sleep(3)
sys.exit(2) sys.exit(2)
@ -4071,7 +4082,7 @@ def check_deprecation_warning():
# but i am not going to troubleshoot or provide support for deprecated flags. # but i am not going to troubleshoot or provide support for deprecated flags.
global using_outdated_flags global using_outdated_flags
if using_outdated_flags: if using_outdated_flags:
print(f"\n=== !!! IMPORTANT WARNING !!! ===") print("\n=== !!! IMPORTANT WARNING !!! ===")
print("You are using one or more OUTDATED config files or launch flags!") print("You are using one or more OUTDATED config files or launch flags!")
print("The flags --hordeconfig and --sdconfig have been DEPRECATED, and MAY be REMOVED in future!") print("The flags --hordeconfig and --sdconfig have been DEPRECATED, and MAY be REMOVED in future!")
print("They will still work for now, but you SHOULD switch to the updated flags instead, to avoid future issues!") print("They will still work for now, but you SHOULD switch to the updated flags instead, to avoid future issues!")
@ -4086,7 +4097,8 @@ def setuptunnel(has_sd):
# This script will help setup a cloudflared tunnel for accessing KoboldCpp over the internet # This script will help setup a cloudflared tunnel for accessing KoboldCpp over the internet
# It should work out of the box on both linux and windows # It should work out of the box on both linux and windows
try: try:
import subprocess, re import subprocess
import re
global sslvalid global sslvalid
httpsaffix = ("https" if sslvalid else "http") httpsaffix = ("https" if sslvalid else "http")
def run_tunnel(): def run_tunnel():
@ -4253,7 +4265,9 @@ def delete_old_pyinstaller():
if not base_path: if not base_path:
return return
import time, os, shutil import time
import os
import shutil
selfdirpath = os.path.abspath(base_path) selfdirpath = os.path.abspath(base_path)
temp_parentdir_path = os.path.abspath(os.path.join(base_path, '..')) temp_parentdir_path = os.path.abspath(os.path.join(base_path, '..'))
for dirname in os.listdir(temp_parentdir_path): for dirname in os.listdir(temp_parentdir_path):
@ -4369,7 +4383,7 @@ def main(launch_args,start_server=True):
ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\nPlease check command line: script.py --help" ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\nPlease check command line: script.py --help"
show_gui_msgbox("Warning, GUI failed to start",ermsg) show_gui_msgbox("Warning, GUI failed to start",ermsg)
if args.skiplauncher: if args.skiplauncher:
print(f"Note: In order to use --skiplauncher, you need to specify a model with --model") print("Note: In order to use --skiplauncher, you need to specify a model with --model")
time.sleep(3) time.sleep(3)
sys.exit(2) sys.exit(2)
@ -4383,7 +4397,7 @@ def main(launch_args,start_server=True):
preloaded_story = f.read() preloaded_story = f.read()
canload = True canload = True
elif isinstance(args.preloadstory, str): elif isinstance(args.preloadstory, str):
print(f"Preloading saved story as JSON into server...") print("Preloading saved story as JSON into server...")
try: try:
import ast import ast
parsed = ast.literal_eval(args.preloadstory) parsed = ast.literal_eval(args.preloadstory)
@ -4400,7 +4414,7 @@ def main(launch_args,start_server=True):
if canload: if canload:
print("Saved story preloaded.") print("Saved story preloaded.")
else: else:
print(f"Warning: Saved story file invalid or not found. No story will be preloaded into server.") print("Warning: Saved story file invalid or not found. No story will be preloaded into server.")
# try to read chat completions adapter # try to read chat completions adapter
if args.chatcompletionsadapter: if args.chatcompletionsadapter:
@ -4439,9 +4453,9 @@ def main(launch_args,start_server=True):
except Exception as ex: except Exception as ex:
print(ex) print(ex)
if canload: if canload:
print(f"Chat Completions Adapter Loaded") print("Chat Completions Adapter Loaded")
else: else:
print(f"Warning: Chat Completions Adapter invalid or not found.") print("Warning: Chat Completions Adapter invalid or not found.")
# handle model downloads if needed # handle model downloads if needed
if args.model_param and args.model_param!="": if args.model_param and args.model_param!="":
@ -4544,7 +4558,7 @@ def main(launch_args,start_server=True):
print("WARNING: GPU layers is set, but a GPU backend was not selected! GPU will not be used!") print("WARNING: GPU layers is set, but a GPU backend was not selected! GPU will not be used!")
args.gpulayers = 0 args.gpulayers = 0
elif args.gpulayers==-1 and sys.platform=="darwin" and args.model_param and os.path.exists(args.model_param): elif args.gpulayers==-1 and sys.platform=="darwin" and args.model_param and os.path.exists(args.model_param):
print(f"MacOS detected: Auto GPU layers set to maximum") print("MacOS detected: Auto GPU layers set to maximum")
args.gpulayers = 200 args.gpulayers = 200
elif not shouldavoidgpu and args.model_param and os.path.exists(args.model_param): elif not shouldavoidgpu and args.model_param and os.path.exists(args.model_param):
if (args.usecublas is None) and (args.usevulkan is None) and (args.useclblast is None): if (args.usecublas is None) and (args.usevulkan is None) and (args.useclblast is None):
@ -4560,7 +4574,7 @@ def main(launch_args,start_server=True):
print(f"Auto Recommended GPU Layers: {layeramt}") print(f"Auto Recommended GPU Layers: {layeramt}")
args.gpulayers = layeramt args.gpulayers = layeramt
else: else:
print(f"No GPU backend found, or could not automatically determine GPU layers. Please set it manually.") print("No GPU backend found, or could not automatically determine GPU layers. Please set it manually.")
args.gpulayers = 0 args.gpulayers = 0
if args.threads == -1: if args.threads == -1:
@ -4654,27 +4668,27 @@ def main(launch_args,start_server=True):
if os.path.exists(args.sdlora): if os.path.exists(args.sdlora):
imglora = os.path.abspath(args.sdlora) imglora = os.path.abspath(args.sdlora)
else: else:
print(f"Missing SD LORA model file...") print("Missing SD LORA model file...")
if args.sdvae: if args.sdvae:
if os.path.exists(args.sdvae): if os.path.exists(args.sdvae):
imgvae = os.path.abspath(args.sdvae) imgvae = os.path.abspath(args.sdvae)
else: else:
print(f"Missing SD VAE model file...") print("Missing SD VAE model file...")
if args.sdt5xxl: if args.sdt5xxl:
if os.path.exists(args.sdt5xxl): if os.path.exists(args.sdt5xxl):
imgt5xxl = os.path.abspath(args.sdt5xxl) imgt5xxl = os.path.abspath(args.sdt5xxl)
else: else:
print(f"Missing SD T5-XXL model file...") print("Missing SD T5-XXL model file...")
if args.sdclipl: if args.sdclipl:
if os.path.exists(args.sdclipl): if os.path.exists(args.sdclipl):
imgclipl = os.path.abspath(args.sdclipl) imgclipl = os.path.abspath(args.sdclipl)
else: else:
print(f"Missing SD Clip-L model file...") print("Missing SD Clip-L model file...")
if args.sdclipg: if args.sdclipg:
if os.path.exists(args.sdclipg): if os.path.exists(args.sdclipg):
imgclipg = os.path.abspath(args.sdclipg) imgclipg = os.path.abspath(args.sdclipg)
else: else:
print(f"Missing SD Clip-G model file...") print("Missing SD Clip-G model file...")
imgmodel = os.path.abspath(imgmodel) imgmodel = os.path.abspath(imgmodel)
fullsdmodelpath = imgmodel fullsdmodelpath = imgmodel
@ -4719,7 +4733,7 @@ def main(launch_args,start_server=True):
embedded_kailite = embedded_kailite.replace(origStr, patchedStr) embedded_kailite = embedded_kailite.replace(origStr, patchedStr)
embedded_kailite = embedded_kailite.encode() embedded_kailite = embedded_kailite.encode()
print("Embedded KoboldAI Lite loaded.") print("Embedded KoboldAI Lite loaded.")
except Exception as e: except Exception:
print("Could not find KoboldAI Lite. Embedded KoboldAI Lite will not be available.") print("Could not find KoboldAI Lite. Embedded KoboldAI Lite will not be available.")
try: try:
@ -4727,7 +4741,7 @@ def main(launch_args,start_server=True):
with open(os.path.join(basepath, "kcpp_docs.embd"), mode='rb') as f: with open(os.path.join(basepath, "kcpp_docs.embd"), mode='rb') as f:
embedded_kcpp_docs = f.read() embedded_kcpp_docs = f.read()
print("Embedded API docs loaded.") print("Embedded API docs loaded.")
except Exception as e: except Exception:
print("Could not find Embedded KoboldCpp API docs.") print("Could not find Embedded KoboldCpp API docs.")
try: try:
@ -4736,7 +4750,7 @@ def main(launch_args,start_server=True):
embedded_kcpp_sdui = f.read() embedded_kcpp_sdui = f.read()
if args.sdmodel: if args.sdmodel:
print("Embedded SDUI loaded.") print("Embedded SDUI loaded.")
except Exception as e: except Exception:
print("Could not find Embedded SDUI.") print("Could not find Embedded SDUI.")
if args.port_param!=defaultport: if args.port_param!=defaultport:
@ -4765,7 +4779,7 @@ def main(launch_args,start_server=True):
try: try:
import webbrowser as wb import webbrowser as wb
wb.open(epurl) wb.open(epurl)
except: except Exception:
print("--launch was set, but could not launch web browser automatically.") print("--launch was set, but could not launch web browser automatically.")
if args.hordekey and args.hordekey!="": if args.hordekey and args.hordekey!="":
@ -4805,12 +4819,12 @@ def main(launch_args,start_server=True):
benchbaneos = False benchbaneos = False
if args.benchmark: if args.benchmark:
if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000: if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000:
print(f"\nWarning: The benchmark CSV output file you selected exceeds 1MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.") print("\nWarning: The benchmark CSV output file you selected exceeds 1MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.")
save_to_file = False save_to_file = False
if save_to_file: if save_to_file:
print(f"\nRunning benchmark (Save to File: {args.benchmark})...") print(f"\nRunning benchmark (Save to File: {args.benchmark})...")
else: else:
print(f"\nRunning benchmark (Not Saved)...") print("\nRunning benchmark (Not Saved)...")
if benchprompt=="": if benchprompt=="":
benchprompt = " 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1" benchprompt = " 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1"
for i in range(0,14): #generate massive prompt for i in range(0,14): #generate massive prompt
@ -4856,7 +4870,7 @@ def main(launch_args,start_server=True):
with open(args.benchmark, "a") as file: with open(args.benchmark, "a") as file:
file.seek(0, 2) file.seek(0, 2)
if file.tell() == 0: #empty file if file.tell() == 0: #empty file
file.write(f"Timestamp,Backend,Layers,Model,MaxCtx,GenAmount,ProcessingTime,ProcessingSpeed,GenerationTime,GenerationSpeed,TotalTime,Output,Flags") file.write("Timestamp,Backend,Layers,Model,MaxCtx,GenAmount,ProcessingTime,ProcessingSpeed,GenerationTime,GenerationSpeed,TotalTime,Output,Flags")
file.write(f"\n{datetimestamp},{libname},{args.gpulayers},{benchmodel},{benchmaxctx},{benchlen},{t_pp:.2f},{s_pp:.2f},{t_gen:.2f},{s_gen:.2f},{(t_pp+t_gen):.2f},{result},{benchflagstr}") file.write(f"\n{datetimestamp},{libname},{args.gpulayers},{benchmodel},{benchmaxctx},{benchlen},{t_pp:.2f},{s_pp:.2f},{t_gen:.2f},{s_gen:.2f},{(t_pp+t_gen):.2f},{result},{benchflagstr}")
except Exception as e: except Exception as e:
print(f"Error writing benchmark to file: {e}") print(f"Error writing benchmark to file: {e}")
@ -4877,7 +4891,7 @@ def main(launch_args,start_server=True):
else: else:
# Flush stdout for previous win32 issue so the client can see output. # Flush stdout for previous win32 issue so the client can see output.
if not args.prompt or args.benchmark: if not args.prompt or args.benchmark:
print(f"Server was not started, main function complete. Idling.", flush=True) print("Server was not started, main function complete. Idling.", flush=True)
def run_in_queue(launch_args, input_queue, output_queue): def run_in_queue(launch_args, input_queue, output_queue):
main(launch_args, start_server=False) main(launch_args, start_server=False)