mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
more linting with Ruff (+1 squashed commits)
Squashed commits: [43802cfe2] Applied default Ruff linting
This commit is contained in:
parent
409e393d10
commit
b7cd210cd2
1 changed files with 114 additions and 100 deletions
214
koboldcpp.py
214
koboldcpp.py
|
@ -9,11 +9,20 @@
|
||||||
# scenarios and everything Kobold and KoboldAI Lite have to offer.
|
# scenarios and everything Kobold and KoboldAI Lite have to offer.
|
||||||
|
|
||||||
import ctypes
|
import ctypes
|
||||||
import os, math, re
|
import os
|
||||||
|
import math
|
||||||
|
import re
|
||||||
import argparse
|
import argparse
|
||||||
import platform
|
import platform
|
||||||
import base64
|
import base64
|
||||||
import json, sys, http.server, time, asyncio, socket, threading
|
import struct
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import http.server
|
||||||
|
import time
|
||||||
|
import asyncio
|
||||||
|
import socket
|
||||||
|
import threading
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
@ -298,7 +307,7 @@ def restore_stdout():
|
||||||
|
|
||||||
def get_default_threads():
|
def get_default_threads():
|
||||||
physical_core_limit = 1
|
physical_core_limit = 1
|
||||||
if os.cpu_count()!=None and os.cpu_count()>1:
|
if os.cpu_count() is not None and os.cpu_count()>1:
|
||||||
physical_core_limit = os.cpu_count() // 2
|
physical_core_limit = os.cpu_count() // 2
|
||||||
default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1))
|
default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1))
|
||||||
processor = platform.processor()
|
processor = platform.processor()
|
||||||
|
@ -521,8 +530,8 @@ def set_backend_props(inputs):
|
||||||
|
|
||||||
if args.usevulkan: #is an empty array if using vulkan without defined gpu
|
if args.usevulkan: #is an empty array if using vulkan without defined gpu
|
||||||
s = ""
|
s = ""
|
||||||
for l in range(0,len(args.usevulkan)):
|
for it in range(0,len(args.usevulkan)):
|
||||||
s += str(args.usevulkan[l])
|
s += str(args.usevulkan[it])
|
||||||
inputs.vulkan_info = s.encode("UTF-8")
|
inputs.vulkan_info = s.encode("UTF-8")
|
||||||
else:
|
else:
|
||||||
inputs.vulkan_info = "".encode("UTF-8")
|
inputs.vulkan_info = "".encode("UTF-8")
|
||||||
|
@ -593,7 +602,7 @@ def unpack_to_dir(destpath = ""):
|
||||||
messagebox.showerror("Error", f"An error occurred while unpacking: {e}")
|
messagebox.showerror("Error", f"An error occurred while unpacking: {e}")
|
||||||
else:
|
else:
|
||||||
if cliunpack:
|
if cliunpack:
|
||||||
print(f"The target folder is not empty or invalid. Please select an empty folder.")
|
print("The target folder is not empty or invalid. Please select an empty folder.")
|
||||||
else:
|
else:
|
||||||
messagebox.showwarning("Invalid Selection", "The target folder is not empty or invalid. Please select an empty folder.")
|
messagebox.showwarning("Invalid Selection", "The target folder is not empty or invalid. Please select an empty folder.")
|
||||||
|
|
||||||
|
@ -647,8 +656,6 @@ def string_contains_or_overlaps_sequence_substring(inputstr, sequences):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
import struct
|
|
||||||
|
|
||||||
def read_gguf_metadata(file_path):
|
def read_gguf_metadata(file_path):
|
||||||
chunk_size = 8192 # read only first 8kb of file
|
chunk_size = 8192 # read only first 8kb of file
|
||||||
try:
|
try:
|
||||||
|
@ -681,7 +688,7 @@ def read_gguf_metadata(file_path):
|
||||||
key_length = read_gguf_key(b'.attention.key_length',data,8192)
|
key_length = read_gguf_key(b'.attention.key_length',data,8192)
|
||||||
val_length = read_gguf_key(b'.attention.value_length',data,8192)
|
val_length = read_gguf_key(b'.attention.value_length',data,8192)
|
||||||
return [layercount,head_count_kv, max(key_length,val_length)]
|
return [layercount,head_count_kv, max(key_length,val_length)]
|
||||||
except Exception as ex:
|
except Exception:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath):
|
def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath):
|
||||||
|
@ -705,7 +712,7 @@ def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,
|
||||||
if fsize>10000000: #dont bother with models < 10mb as they are probably bad
|
if fsize>10000000: #dont bother with models < 10mb as they are probably bad
|
||||||
ggufmeta = read_gguf_metadata(filepath)
|
ggufmeta = read_gguf_metadata(filepath)
|
||||||
modelfile_extracted_meta = [ggufmeta,fsize,sdfsize,whisperfsize,mmprojsize,draftmodelsize] #extract done. note that meta may be null
|
modelfile_extracted_meta = [ggufmeta,fsize,sdfsize,whisperfsize,mmprojsize,draftmodelsize] #extract done. note that meta may be null
|
||||||
except Exception as ex:
|
except Exception:
|
||||||
modelfile_extracted_meta = None
|
modelfile_extracted_meta = None
|
||||||
|
|
||||||
def autoset_gpu_layers(ctxsize,sdquanted,bbs): #shitty algo to determine how many layers to use
|
def autoset_gpu_layers(ctxsize,sdquanted,bbs): #shitty algo to determine how many layers to use
|
||||||
|
@ -757,7 +764,7 @@ def autoset_gpu_layers(ctxsize,sdquanted,bbs): #shitty algo to determine how man
|
||||||
layerlimit = min(int(ratio*layers), (layers + 3))
|
layerlimit = min(int(ratio*layers), (layers + 3))
|
||||||
layerlimit = (0 if layerlimit<=2 else layerlimit)
|
layerlimit = (0 if layerlimit<=2 else layerlimit)
|
||||||
return layerlimit
|
return layerlimit
|
||||||
except Exception as ex:
|
except Exception:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def fetch_gpu_properties(testCL,testCU,testVK):
|
def fetch_gpu_properties(testCL,testCU,testVK):
|
||||||
|
@ -773,7 +780,7 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
||||||
FetchedCUdevices = [line.split(",")[0].strip() for line in output.splitlines()]
|
FetchedCUdevices = [line.split(",")[0].strip() for line in output.splitlines()]
|
||||||
FetchedCUdeviceMem = [line.split(",")[1].strip().split(" ")[0].strip() for line in output.splitlines()]
|
FetchedCUdeviceMem = [line.split(",")[1].strip().split(" ")[0].strip() for line in output.splitlines()]
|
||||||
FetchedCUfreeMem = [line.split(",")[2].strip().split(" ")[0].strip() for line in output.splitlines()]
|
FetchedCUfreeMem = [line.split(",")[2].strip().split(" ")[0].strip() for line in output.splitlines()]
|
||||||
except Exception as e:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
if len(FetchedCUdevices)==0:
|
if len(FetchedCUdevices)==0:
|
||||||
try: # Get AMD ROCm GPU names
|
try: # Get AMD ROCm GPU names
|
||||||
|
@ -781,16 +788,18 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
||||||
device_name = None
|
device_name = None
|
||||||
for line in output.splitlines(): # read through the output line by line
|
for line in output.splitlines(): # read through the output line by line
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if line.startswith("Marketing Name:"): device_name = line.split(":", 1)[1].strip() # if we find a named device, temporarily save the name
|
if line.startswith("Marketing Name:"):
|
||||||
|
device_name = line.split(":", 1)[1].strip() # if we find a named device, temporarily save the name
|
||||||
elif line.startswith("Device Type:") and "GPU" in line and device_name is not None: # if the following Device Type is a GPU (not a CPU) then add it to devices list
|
elif line.startswith("Device Type:") and "GPU" in line and device_name is not None: # if the following Device Type is a GPU (not a CPU) then add it to devices list
|
||||||
FetchedCUdevices.append(device_name)
|
FetchedCUdevices.append(device_name)
|
||||||
AMDgpu = True
|
AMDgpu = True
|
||||||
elif line.startswith("Device Type:") and "GPU" not in line: device_name = None
|
elif line.startswith("Device Type:") and "GPU" not in line:
|
||||||
|
device_name = None
|
||||||
if FetchedCUdevices:
|
if FetchedCUdevices:
|
||||||
getamdvram = subprocess.run(['rocm-smi', '--showmeminfo', 'vram', '--csv'], capture_output=True, text=True, check=True, encoding='utf-8').stdout # fetch VRAM of devices
|
getamdvram = subprocess.run(['rocm-smi', '--showmeminfo', 'vram', '--csv'], capture_output=True, text=True, check=True, encoding='utf-8').stdout # fetch VRAM of devices
|
||||||
if getamdvram:
|
if getamdvram:
|
||||||
FetchedCUdeviceMem = [line.split(",")[1].strip() for line in getamdvram.splitlines()[1:] if line.strip()]
|
FetchedCUdeviceMem = [line.split(",")[1].strip() for line in getamdvram.splitlines()[1:] if line.strip()]
|
||||||
except Exception as e:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
lowestcumem = 0
|
lowestcumem = 0
|
||||||
lowestfreecumem = 0
|
lowestfreecumem = 0
|
||||||
|
@ -823,7 +832,7 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
||||||
if idx<len(VKIsDGPU):
|
if idx<len(VKIsDGPU):
|
||||||
VKIsDGPU[idx] = (1 if dvtype=="PHYSICAL_DEVICE_TYPE_DISCRETE_GPU" else 0)
|
VKIsDGPU[idx] = (1 if dvtype=="PHYSICAL_DEVICE_TYPE_DISCRETE_GPU" else 0)
|
||||||
idx += 1
|
idx += 1
|
||||||
except Exception as e:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if testCL:
|
if testCL:
|
||||||
|
@ -834,7 +843,7 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
||||||
try:
|
try:
|
||||||
output = subprocess.run(["clinfo","--json"], capture_output=True, text=True, check=True, encoding='utf-8').stdout
|
output = subprocess.run(["clinfo","--json"], capture_output=True, text=True, check=True, encoding='utf-8').stdout
|
||||||
data = json.loads(output)
|
data = json.loads(output)
|
||||||
except Exception as e1:
|
except Exception:
|
||||||
output = subprocess.run([((os.path.join(basepath, "winclinfo.exe")) if os.name == 'nt' else "clinfo"),"--json"], capture_output=True, text=True, check=True, creationflags=subprocess.CREATE_NO_WINDOW | subprocess.DETACHED_PROCESS, encoding='utf-8').stdout
|
output = subprocess.run([((os.path.join(basepath, "winclinfo.exe")) if os.name == 'nt' else "clinfo"),"--json"], capture_output=True, text=True, check=True, creationflags=subprocess.CREATE_NO_WINDOW | subprocess.DETACHED_PROCESS, encoding='utf-8').stdout
|
||||||
data = json.loads(output)
|
data = json.loads(output)
|
||||||
plat = 0
|
plat = 0
|
||||||
|
@ -852,7 +861,7 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
||||||
dev += 1
|
dev += 1
|
||||||
plat += 1
|
plat += 1
|
||||||
MaxMemory[0] = max(lowestclmem,MaxMemory[0])
|
MaxMemory[0] = max(lowestclmem,MaxMemory[0])
|
||||||
except Exception as e:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -1318,12 +1327,12 @@ def extract_json_from_string(input_string):
|
||||||
try: # First check if model exported perfect json
|
try: # First check if model exported perfect json
|
||||||
parsed_json = json.loads(input_string)
|
parsed_json = json.loads(input_string)
|
||||||
return parsed_json
|
return parsed_json
|
||||||
except Exception as e:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
try: # Next check if all we need is to add brackets to make it perfect json
|
try: # Next check if all we need is to add brackets to make it perfect json
|
||||||
parsed_json = json.loads(f"[{input_string}]")
|
parsed_json = json.loads(f"[{input_string}]")
|
||||||
return parsed_json
|
return parsed_json
|
||||||
except Exception as e:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
# Now use regular expression to match JSON objects or arrays in case part is valid json and part is not
|
# Now use regular expression to match JSON objects or arrays in case part is valid json and part is not
|
||||||
|
@ -1333,9 +1342,9 @@ def extract_json_from_string(input_string):
|
||||||
try:
|
try:
|
||||||
parsed_json = json.loads(potential_json)
|
parsed_json = json.loads(potential_json)
|
||||||
return parsed_json
|
return parsed_json
|
||||||
except Exception as e:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
except Exception as e:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
@ -1383,7 +1392,7 @@ def transform_genparams(genparams, api_format):
|
||||||
rp3 = genparams.get('rep_pen', 1.0)
|
rp3 = genparams.get('rep_pen', 1.0)
|
||||||
rp_max = max(rp1,rp2,rp3)
|
rp_max = max(rp1,rp2,rp3)
|
||||||
genparams["rep_pen"] = rp_max
|
genparams["rep_pen"] = rp_max
|
||||||
if "use_default_badwordsids" in genparams and not ("ban_eos_token" in genparams):
|
if "use_default_badwordsids" in genparams and "ban_eos_token" not in genparams:
|
||||||
genparams["ban_eos_token"] = genparams.get('use_default_badwordsids', False)
|
genparams["ban_eos_token"] = genparams.get('use_default_badwordsids', False)
|
||||||
|
|
||||||
if api_format==1:
|
if api_format==1:
|
||||||
|
@ -1451,7 +1460,7 @@ def transform_genparams(genparams, api_format):
|
||||||
if message['role'] == "user" and message_index == len(messages_array):
|
if message['role'] == "user" and message_index == len(messages_array):
|
||||||
# Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
|
# Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
|
||||||
tools_array = genparams.get('tools', [])
|
tools_array = genparams.get('tools', [])
|
||||||
if tools_array and len(tools_array) > 0 and genparams.get('tool_choice',None) != None:
|
if tools_array and len(tools_array) > 0 and genparams.get('tool_choice',None) is not None:
|
||||||
response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}]
|
response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}]
|
||||||
json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0)
|
json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0)
|
||||||
tools_string = json.dumps(tools_array, indent=0)
|
tools_string = json.dumps(tools_array, indent=0)
|
||||||
|
@ -1461,7 +1470,7 @@ def transform_genparams(genparams, api_format):
|
||||||
try:
|
try:
|
||||||
specified_function = genparams.get('tool_choice').get('function').get('name')
|
specified_function = genparams.get('tool_choice').get('function').get('name')
|
||||||
json_formatting_instruction = f"The user is asking you to use the style of this JSON object formatting to complete the parameters for the specific function named {specified_function} in the following format: " + json.dumps([{"id": "insert an id for the response", "type": "function", "function": {"name": f"{specified_function}", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}], indent=0)
|
json_formatting_instruction = f"The user is asking you to use the style of this JSON object formatting to complete the parameters for the specific function named {specified_function} in the following format: " + json.dumps([{"id": "insert an id for the response", "type": "function", "function": {"name": f"{specified_function}", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}], indent=0)
|
||||||
except Exception as e:
|
except Exception:
|
||||||
# In case of any issues, just revert back to no specified function
|
# In case of any issues, just revert back to no specified function
|
||||||
pass
|
pass
|
||||||
messages_string += json_formatting_instruction
|
messages_string += json_formatting_instruction
|
||||||
|
@ -1671,7 +1680,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
self.wfile.flush()
|
self.wfile.flush()
|
||||||
|
|
||||||
async def send_kai_sse_event(self, data):
|
async def send_kai_sse_event(self, data):
|
||||||
self.wfile.write(f'event: message\n'.encode())
|
self.wfile.write('event: message\n'.encode())
|
||||||
self.wfile.write(f'data: {data}\n\n'.encode())
|
self.wfile.write(f'data: {data}\n\n'.encode())
|
||||||
self.wfile.flush()
|
self.wfile.flush()
|
||||||
|
|
||||||
|
@ -1803,11 +1812,11 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
auth_header = self.headers['Authorization']
|
auth_header = self.headers['Authorization']
|
||||||
elif 'authorization' in self.headers:
|
elif 'authorization' in self.headers:
|
||||||
auth_header = self.headers['authorization']
|
auth_header = self.headers['authorization']
|
||||||
if auth_header != None and auth_header.startswith('Bearer '):
|
if auth_header is not None and auth_header.startswith('Bearer '):
|
||||||
token = auth_header[len('Bearer '):].strip()
|
token = auth_header[len('Bearer '):].strip()
|
||||||
if token==password:
|
if token==password:
|
||||||
auth_ok = True
|
auth_ok = True
|
||||||
if auth_ok==False:
|
if auth_ok is False:
|
||||||
self.send_response(401)
|
self.send_response(401)
|
||||||
self.end_headers(content_type='application/json')
|
self.end_headers(content_type='application/json')
|
||||||
self.wfile.write(json.dumps({"detail": {
|
self.wfile.write(json.dumps({"detail": {
|
||||||
|
@ -1847,7 +1856,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
epurl = f"{httpsaffix}://localhost:{args.port}"
|
epurl = f"{httpsaffix}://localhost:{args.port}"
|
||||||
if args.host!="":
|
if args.host!="":
|
||||||
epurl = f"{httpsaffix}://{args.host}:{args.port}"
|
epurl = f"{httpsaffix}://{args.host}:{args.port}"
|
||||||
gen_payload = {"prompt": prompt,"max_length": max_length,"temperature": temperature,"prompt": prompt,"top_k": top_k,"top_p": top_p,"rep_pen": rep_pen,"ban_eos_token":ban_eos_token}
|
gen_payload = {"prompt": prompt,"max_length": max_length,"temperature": temperature,"top_k": top_k,"top_p": top_p,"rep_pen": rep_pen,"ban_eos_token":ban_eos_token}
|
||||||
respjson = make_url_request(f'{epurl}/api/v1/generate', gen_payload)
|
respjson = make_url_request(f'{epurl}/api/v1/generate', gen_payload)
|
||||||
reply = html.escape(respjson["results"][0]["text"])
|
reply = html.escape(respjson["results"][0]["text"])
|
||||||
status = "Generation Completed"
|
status = "Generation Completed"
|
||||||
|
@ -1928,7 +1937,7 @@ Enter Prompt:<br>
|
||||||
auth_header = self.headers['Authorization']
|
auth_header = self.headers['Authorization']
|
||||||
elif 'authorization' in self.headers:
|
elif 'authorization' in self.headers:
|
||||||
auth_header = self.headers['authorization']
|
auth_header = self.headers['authorization']
|
||||||
if auth_header != None and auth_header.startswith('Bearer '):
|
if auth_header is not None and auth_header.startswith('Bearer '):
|
||||||
token = auth_header[len('Bearer '):].strip()
|
token = auth_header[len('Bearer '):].strip()
|
||||||
if token==password:
|
if token==password:
|
||||||
auth_ok = True
|
auth_ok = True
|
||||||
|
@ -2048,20 +2057,20 @@ Enter Prompt:<br>
|
||||||
elif self.path=="/api" or self.path=="/docs" or self.path.startswith(('/api/?json=','/api?json=','/docs/?json=','/docs?json=')):
|
elif self.path=="/api" or self.path=="/docs" or self.path.startswith(('/api/?json=','/api?json=','/docs/?json=','/docs?json=')):
|
||||||
content_type = 'text/html'
|
content_type = 'text/html'
|
||||||
if embedded_kcpp_docs is None:
|
if embedded_kcpp_docs is None:
|
||||||
response_body = (f"KoboldCpp API is running!\n\nAPI usage reference can be found at the wiki: https://github.com/LostRuins/koboldcpp/wiki").encode()
|
response_body = ("KoboldCpp API is running!\n\nAPI usage reference can be found at the wiki: https://github.com/LostRuins/koboldcpp/wiki").encode()
|
||||||
else:
|
else:
|
||||||
response_body = embedded_kcpp_docs
|
response_body = embedded_kcpp_docs
|
||||||
|
|
||||||
elif self.path.startswith(("/sdui")):
|
elif self.path.startswith(("/sdui")):
|
||||||
content_type = 'text/html'
|
content_type = 'text/html'
|
||||||
if embedded_kcpp_sdui is None:
|
if embedded_kcpp_sdui is None:
|
||||||
response_body = (f"KoboldCpp API is running, but KCPP SDUI is not loaded").encode()
|
response_body = ("KoboldCpp API is running, but KCPP SDUI is not loaded").encode()
|
||||||
else:
|
else:
|
||||||
response_body = embedded_kcpp_sdui
|
response_body = embedded_kcpp_sdui
|
||||||
|
|
||||||
elif self.path=="/v1":
|
elif self.path=="/v1":
|
||||||
content_type = 'text/html'
|
content_type = 'text/html'
|
||||||
response_body = (f"KoboldCpp OpenAI compatible endpoint is running!\n\nFor usage reference, see https://platform.openai.com/docs/api-reference").encode()
|
response_body = ("KoboldCpp OpenAI compatible endpoint is running!\n\nFor usage reference, see https://platform.openai.com/docs/api-reference").encode()
|
||||||
|
|
||||||
elif self.path=="/api/extra/preloadstory":
|
elif self.path=="/api/extra/preloadstory":
|
||||||
if preloaded_story is None:
|
if preloaded_story is None:
|
||||||
|
@ -2128,7 +2137,7 @@ Enter Prompt:<br>
|
||||||
self.rfile.readline()
|
self.rfile.readline()
|
||||||
if chunk_length == 0:
|
if chunk_length == 0:
|
||||||
break
|
break
|
||||||
except Exception as e:
|
except Exception:
|
||||||
self.send_response(500)
|
self.send_response(500)
|
||||||
self.end_headers(content_type='application/json')
|
self.end_headers(content_type='application/json')
|
||||||
self.wfile.write(json.dumps({"detail": {
|
self.wfile.write(json.dumps({"detail": {
|
||||||
|
@ -2177,7 +2186,7 @@ Enter Prompt:<br>
|
||||||
tempbody = json.loads(body)
|
tempbody = json.loads(body)
|
||||||
if isinstance(tempbody, dict):
|
if isinstance(tempbody, dict):
|
||||||
multiuserkey = tempbody.get('genkey', "")
|
multiuserkey = tempbody.get('genkey', "")
|
||||||
except Exception as e:
|
except Exception:
|
||||||
multiuserkey = ""
|
multiuserkey = ""
|
||||||
pass
|
pass
|
||||||
if (multiuserkey=="" and requestsinqueue==0) or (multiuserkey!="" and multiuserkey==currentusergenkey):
|
if (multiuserkey=="" and requestsinqueue==0) or (multiuserkey!="" and multiuserkey==currentusergenkey):
|
||||||
|
@ -2200,7 +2209,7 @@ Enter Prompt:<br>
|
||||||
tempbody = json.loads(body)
|
tempbody = json.loads(body)
|
||||||
if isinstance(tempbody, dict):
|
if isinstance(tempbody, dict):
|
||||||
multiuserkey = tempbody.get('genkey', "")
|
multiuserkey = tempbody.get('genkey', "")
|
||||||
except Exception as e:
|
except Exception:
|
||||||
multiuserkey = ""
|
multiuserkey = ""
|
||||||
|
|
||||||
if totalgens>0:
|
if totalgens>0:
|
||||||
|
@ -2218,7 +2227,7 @@ Enter Prompt:<br>
|
||||||
tempbody = json.loads(body)
|
tempbody = json.loads(body)
|
||||||
if isinstance(tempbody, dict):
|
if isinstance(tempbody, dict):
|
||||||
multiuserkey = tempbody.get('genkey', "")
|
multiuserkey = tempbody.get('genkey', "")
|
||||||
except Exception as e:
|
except Exception:
|
||||||
multiuserkey = ""
|
multiuserkey = ""
|
||||||
|
|
||||||
if totalgens>0:
|
if totalgens>0:
|
||||||
|
@ -2240,7 +2249,7 @@ Enter Prompt:<br>
|
||||||
if isinstance(tempbody, dict):
|
if isinstance(tempbody, dict):
|
||||||
sender = tempbody.get('sender', "")
|
sender = tempbody.get('sender', "")
|
||||||
senderbusy = tempbody.get('senderbusy', False)
|
senderbusy = tempbody.get('senderbusy', False)
|
||||||
except Exception as e:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
if sender!="" and senderbusy:
|
if sender!="" and senderbusy:
|
||||||
multiplayer_lastactive[sender] = int(time.time())
|
multiplayer_lastactive[sender] = int(time.time())
|
||||||
|
@ -2380,7 +2389,7 @@ Enter Prompt:<br>
|
||||||
genparams = None
|
genparams = None
|
||||||
try:
|
try:
|
||||||
genparams = json.loads(body)
|
genparams = json.loads(body)
|
||||||
except Exception as e:
|
except Exception:
|
||||||
genparams = None
|
genparams = None
|
||||||
if is_transcribe: #fallback handling of file uploads
|
if is_transcribe: #fallback handling of file uploads
|
||||||
b64wav = self.extract_b64string_from_file_upload(body)
|
b64wav = self.extract_b64string_from_file_upload(body)
|
||||||
|
@ -2399,7 +2408,7 @@ Enter Prompt:<br>
|
||||||
|
|
||||||
is_quiet = args.quiet
|
is_quiet = args.quiet
|
||||||
if (args.debugmode != -1 and not is_quiet) or args.debugmode >= 1:
|
if (args.debugmode != -1 and not is_quiet) or args.debugmode >= 1:
|
||||||
utfprint(f"\nInput: " + json.dumps(genparams))
|
utfprint("\nInput: " + json.dumps(genparams))
|
||||||
|
|
||||||
if args.foreground:
|
if args.foreground:
|
||||||
bring_terminal_to_foreground()
|
bring_terminal_to_foreground()
|
||||||
|
@ -2497,7 +2506,7 @@ def is_port_in_use(portNum):
|
||||||
import socket
|
import socket
|
||||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||||
return s.connect_ex(('localhost', portNum)) == 0
|
return s.connect_ex(('localhost', portNum)) == 0
|
||||||
except Exception as ex:
|
except Exception:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def is_ipv6_supported():
|
def is_ipv6_supported():
|
||||||
|
@ -2508,7 +2517,7 @@ def is_ipv6_supported():
|
||||||
sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1)
|
sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1)
|
||||||
sock.close()
|
sock.close()
|
||||||
return True
|
return True
|
||||||
except Exception as ex:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def RunServerMultiThreaded(addr, port):
|
def RunServerMultiThreaded(addr, port):
|
||||||
|
@ -2542,7 +2551,7 @@ def RunServerMultiThreaded(addr, port):
|
||||||
try:
|
try:
|
||||||
ipv6_sock.bind((addr, port))
|
ipv6_sock.bind((addr, port))
|
||||||
ipv6_sock.listen(numThreads)
|
ipv6_sock.listen(numThreads)
|
||||||
except Exception as ex:
|
except Exception:
|
||||||
ipv6_sock = None
|
ipv6_sock = None
|
||||||
print("IPv6 Socket Failed to Bind. IPv6 will be unavailable.")
|
print("IPv6 Socket Failed to Bind. IPv6 will be unavailable.")
|
||||||
|
|
||||||
|
@ -2619,7 +2628,7 @@ def show_gui():
|
||||||
import darkdetect as darkdt
|
import darkdetect as darkdt
|
||||||
darkdt.isDark()
|
darkdt.isDark()
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
import customtkinter as ctk
|
import customtkinter as ctk
|
||||||
|
@ -2727,7 +2736,7 @@ def show_gui():
|
||||||
blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"]
|
blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"]
|
||||||
blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"]
|
blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"]
|
||||||
contextsize_text = ["256", "512", "1024", "2048", "3072", "4096", "6144", "8192", "12288", "16384", "24576", "32768", "49152", "65536", "98304", "131072"]
|
contextsize_text = ["256", "512", "1024", "2048", "3072", "4096", "6144", "8192", "12288", "16384", "24576", "32768", "49152", "65536", "98304", "131072"]
|
||||||
antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if not (opt in runopts)]
|
antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if opt not in runopts]
|
||||||
quantkv_text = ["F16 (Off)","8-Bit","4-Bit"]
|
quantkv_text = ["F16 (Off)","8-Bit","4-Bit"]
|
||||||
|
|
||||||
if not any(runopts):
|
if not any(runopts):
|
||||||
|
@ -2942,8 +2951,8 @@ def show_gui():
|
||||||
def setup_backend_tooltip(parent):
|
def setup_backend_tooltip(parent):
|
||||||
# backend count label with the tooltip function
|
# backend count label with the tooltip function
|
||||||
nl = '\n'
|
nl = '\n'
|
||||||
tooltxt = f"Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "")
|
tooltxt = "Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "")
|
||||||
num_backends_built = makelabel(parent, str(len(runopts)) + f"/8", 5, 2,tooltxt)
|
num_backends_built = makelabel(parent, str(len(runopts)) + "/8", 5, 2,tooltxt)
|
||||||
num_backends_built.grid(row=1, column=1, padx=195, pady=0)
|
num_backends_built.grid(row=1, column=1, padx=195, pady=0)
|
||||||
num_backends_built.configure(text_color="#00ff00")
|
num_backends_built.configure(text_color="#00ff00")
|
||||||
|
|
||||||
|
@ -2967,17 +2976,17 @@ def show_gui():
|
||||||
layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
||||||
quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
||||||
if sys.platform=="darwin" and gpulayers_var.get()=="-1":
|
if sys.platform=="darwin" and gpulayers_var.get()=="-1":
|
||||||
quick_layercounter_label.configure(text=f"(Auto: All Layers)")
|
quick_layercounter_label.configure(text="(Auto: All Layers)")
|
||||||
layercounter_label.configure(text=f"(Auto: All Layers)")
|
layercounter_label.configure(text="(Auto: All Layers)")
|
||||||
elif gpu_be and gpulayers_var.get()=="-1" and predicted_gpu_layers>0:
|
elif gpu_be and gpulayers_var.get()=="-1" and predicted_gpu_layers>0:
|
||||||
quick_layercounter_label.configure(text=f"(Auto: {predicted_gpu_layers}{max_gpu_layers} Layers)")
|
quick_layercounter_label.configure(text=f"(Auto: {predicted_gpu_layers}{max_gpu_layers} Layers)")
|
||||||
layercounter_label.configure(text=f"(Auto: {predicted_gpu_layers}{max_gpu_layers} Layers)")
|
layercounter_label.configure(text=f"(Auto: {predicted_gpu_layers}{max_gpu_layers} Layers)")
|
||||||
elif gpu_be and gpulayers_var.get()=="-1" and predicted_gpu_layers<=0 and (modelfile_extracted_meta and modelfile_extracted_meta[1]):
|
elif gpu_be and gpulayers_var.get()=="-1" and predicted_gpu_layers<=0 and (modelfile_extracted_meta and modelfile_extracted_meta[1]):
|
||||||
quick_layercounter_label.configure(text=f"(Auto: No Offload)")
|
quick_layercounter_label.configure(text="(Auto: No Offload)")
|
||||||
layercounter_label.configure(text=f"(Auto: No Offload)")
|
layercounter_label.configure(text="(Auto: No Offload)")
|
||||||
elif gpu_be and gpulayers_var.get()=="":
|
elif gpu_be and gpulayers_var.get()=="":
|
||||||
quick_layercounter_label.configure(text=f"(Set -1 for Auto)")
|
quick_layercounter_label.configure(text="(Set -1 for Auto)")
|
||||||
layercounter_label.configure(text=f"(Set -1 for Auto)")
|
layercounter_label.configure(text="(Set -1 for Auto)")
|
||||||
else:
|
else:
|
||||||
layercounter_label.grid_remove()
|
layercounter_label.grid_remove()
|
||||||
quick_layercounter_label.grid_remove()
|
quick_layercounter_label.grid_remove()
|
||||||
|
@ -3000,7 +3009,7 @@ def show_gui():
|
||||||
else:
|
else:
|
||||||
quick_gpuname_label.configure(text=CUDevicesNames[s])
|
quick_gpuname_label.configure(text=CUDevicesNames[s])
|
||||||
gpuname_label.configure(text=CUDevicesNames[s])
|
gpuname_label.configure(text=CUDevicesNames[s])
|
||||||
except Exception as ex:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
quick_gpuname_label.configure(text="")
|
quick_gpuname_label.configure(text="")
|
||||||
|
@ -3395,7 +3404,7 @@ def show_gui():
|
||||||
savdict["tensor_split"] = None
|
savdict["tensor_split"] = None
|
||||||
savdict["config"] = None
|
savdict["config"] = None
|
||||||
filename = asksaveasfile(filetypes=file_type, defaultextension=file_type)
|
filename = asksaveasfile(filetypes=file_type, defaultextension=file_type)
|
||||||
if filename == None:
|
if filename is None:
|
||||||
return
|
return
|
||||||
file = open(str(filename.name), 'a')
|
file = open(str(filename.name), 'a')
|
||||||
file.write(json.dumps(savdict))
|
file.write(json.dumps(savdict))
|
||||||
|
@ -3501,10 +3510,10 @@ def show_gui():
|
||||||
args.chatcompletionsadapter = None if chatcompletionsadapter_var.get() == "" else chatcompletionsadapter_var.get()
|
args.chatcompletionsadapter = None if chatcompletionsadapter_var.get() == "" else chatcompletionsadapter_var.get()
|
||||||
try:
|
try:
|
||||||
if kcpp_exporting_template and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter):
|
if kcpp_exporting_template and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter):
|
||||||
print(f"Embedding chat completions adapter...") # parse and save embedded preload story
|
print("Embedding chat completions adapter...") # parse and save embedded preload story
|
||||||
with open(args.chatcompletionsadapter, 'r') as f:
|
with open(args.chatcompletionsadapter, 'r') as f:
|
||||||
args.chatcompletionsadapter = json.load(f)
|
args.chatcompletionsadapter = json.load(f)
|
||||||
except Exception as ex2:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
args.model_param = None if model_var.get() == "" else model_var.get()
|
args.model_param = None if model_var.get() == "" else model_var.get()
|
||||||
|
@ -3512,10 +3521,10 @@ def show_gui():
|
||||||
args.preloadstory = None if preloadstory_var.get() == "" else preloadstory_var.get()
|
args.preloadstory = None if preloadstory_var.get() == "" else preloadstory_var.get()
|
||||||
try:
|
try:
|
||||||
if kcpp_exporting_template and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory):
|
if kcpp_exporting_template and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory):
|
||||||
print(f"Embedding preload story...") # parse and save embedded preload story
|
print("Embedding preload story...") # parse and save embedded preload story
|
||||||
with open(args.preloadstory, 'r') as f:
|
with open(args.preloadstory, 'r') as f:
|
||||||
args.preloadstory = json.load(f)
|
args.preloadstory = json.load(f)
|
||||||
except Exception as ex2:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
args.mmproj = None if mmproj_var.get() == "" else mmproj_var.get()
|
args.mmproj = None if mmproj_var.get() == "" else mmproj_var.get()
|
||||||
args.draftmodel = None if draftmodel_var.get() == "" else draftmodel_var.get()
|
args.draftmodel = None if draftmodel_var.get() == "" else draftmodel_var.get()
|
||||||
|
@ -3732,7 +3741,8 @@ def show_gui():
|
||||||
savdict = json.loads(json.dumps(args.__dict__))
|
savdict = json.loads(json.dumps(args.__dict__))
|
||||||
file_type = [("KoboldCpp Settings", "*.kcpps")]
|
file_type = [("KoboldCpp Settings", "*.kcpps")]
|
||||||
filename = asksaveasfile(filetypes=file_type, defaultextension=file_type)
|
filename = asksaveasfile(filetypes=file_type, defaultextension=file_type)
|
||||||
if filename == None: return
|
if filename is None:
|
||||||
|
return
|
||||||
file = open(str(filename.name), 'a')
|
file = open(str(filename.name), 'a')
|
||||||
file.write(json.dumps(savdict))
|
file.write(json.dumps(savdict))
|
||||||
file.close()
|
file.close()
|
||||||
|
@ -3754,19 +3764,19 @@ def show_gui():
|
||||||
try:
|
try:
|
||||||
import webbrowser as wb
|
import webbrowser as wb
|
||||||
wb.open("https://github.com/LostRuins/koboldcpp/wiki")
|
wb.open("https://github.com/LostRuins/koboldcpp/wiki")
|
||||||
except:
|
except Exception:
|
||||||
print("Cannot launch help in browser.")
|
print("Cannot launch help in browser.")
|
||||||
def display_help_models():
|
def display_help_models():
|
||||||
try:
|
try:
|
||||||
import webbrowser as wb
|
import webbrowser as wb
|
||||||
wb.open("https://github.com/LostRuins/koboldcpp/wiki#what-models-does-koboldcpp-support-what-architectures-are-supported")
|
wb.open("https://github.com/LostRuins/koboldcpp/wiki#what-models-does-koboldcpp-support-what-architectures-are-supported")
|
||||||
except:
|
except Exception:
|
||||||
print("Cannot launch help in browser.")
|
print("Cannot launch help in browser.")
|
||||||
def display_updates():
|
def display_updates():
|
||||||
try:
|
try:
|
||||||
import webbrowser as wb
|
import webbrowser as wb
|
||||||
wb.open("https://github.com/LostRuins/koboldcpp/releases/latest")
|
wb.open("https://github.com/LostRuins/koboldcpp/releases/latest")
|
||||||
except:
|
except Exception:
|
||||||
print("Cannot launch updates in browser.")
|
print("Cannot launch updates in browser.")
|
||||||
|
|
||||||
ctk.CTkButton(tabs , text = "Launch", fg_color="#2f8d3c", hover_color="#2faa3c", command = guilaunch, width=80, height = 35 ).grid(row=1,column=1, stick="se", padx= 25, pady=5)
|
ctk.CTkButton(tabs , text = "Launch", fg_color="#2f8d3c", hover_color="#2faa3c", command = guilaunch, width=80, height = 35 ).grid(row=1,column=1, stick="se", padx= 25, pady=5)
|
||||||
|
@ -3820,7 +3830,7 @@ def show_gui_msgbox(title,message):
|
||||||
messagebox.showerror(title=title, message=message)
|
messagebox.showerror(title=title, message=message)
|
||||||
root.withdraw()
|
root.withdraw()
|
||||||
root.quit()
|
root.quit()
|
||||||
except Exception as ex2:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def show_gui_yesnobox(title,message):
|
def show_gui_yesnobox(title,message):
|
||||||
|
@ -3834,7 +3844,7 @@ def show_gui_yesnobox(title,message):
|
||||||
root.withdraw()
|
root.withdraw()
|
||||||
root.quit()
|
root.quit()
|
||||||
return result
|
return result
|
||||||
except Exception as ex2:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -3842,7 +3852,8 @@ def print_with_time(txt):
|
||||||
print(f"{datetime.now().strftime('[%H:%M:%S]')} " + txt, flush=True)
|
print(f"{datetime.now().strftime('[%H:%M:%S]')} " + txt, flush=True)
|
||||||
|
|
||||||
def make_url_request(url, data, method='POST', headers={}):
|
def make_url_request(url, data, method='POST', headers={}):
|
||||||
import urllib.request, ssl
|
import urllib.request
|
||||||
|
import ssl
|
||||||
global nocertify
|
global nocertify
|
||||||
try:
|
try:
|
||||||
request = None
|
request = None
|
||||||
|
@ -3889,7 +3900,7 @@ def run_horde_worker(args, api_key, worker_name):
|
||||||
reply = make_url_request_horde(url, submit_dict)
|
reply = make_url_request_horde(url, submit_dict)
|
||||||
if not reply:
|
if not reply:
|
||||||
punishcounter += 1
|
punishcounter += 1
|
||||||
print_with_time(f"Error, Job submit failed.")
|
print_with_time("Error, Job submit failed.")
|
||||||
else:
|
else:
|
||||||
reward = reply["reward"]
|
reward = reply["reward"]
|
||||||
session_kudos_earned += reward
|
session_kudos_earned += reward
|
||||||
|
@ -3925,7 +3936,7 @@ def run_horde_worker(args, api_key, worker_name):
|
||||||
sleepy_counter = 0 #if this exceeds a value, worker becomes sleepy (slower)
|
sleepy_counter = 0 #if this exceeds a value, worker becomes sleepy (slower)
|
||||||
exitcounter = 0
|
exitcounter = 0
|
||||||
print(f"===\nEmbedded Horde Worker '{worker_name}' Starting...\n(To use your own Horde Bridge/Scribe worker instead, don't set your API key)\n")
|
print(f"===\nEmbedded Horde Worker '{worker_name}' Starting...\n(To use your own Horde Bridge/Scribe worker instead, don't set your API key)\n")
|
||||||
BRIDGE_AGENT = f"KoboldCppEmbedWorker:2:https://github.com/LostRuins/koboldcpp"
|
BRIDGE_AGENT = "KoboldCppEmbedWorker:2:https://github.com/LostRuins/koboldcpp"
|
||||||
cluster = "https://aihorde.net"
|
cluster = "https://aihorde.net"
|
||||||
while exitcounter < 10:
|
while exitcounter < 10:
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
|
@ -3944,10 +3955,10 @@ def run_horde_worker(args, api_key, worker_name):
|
||||||
if exitcounter < 10:
|
if exitcounter < 10:
|
||||||
penaltytime = (2 ** exitcounter)
|
penaltytime = (2 ** exitcounter)
|
||||||
print_with_time(f"Horde Worker Paused for {penaltytime} min - Too many errors. It will resume automatically, but you should restart it.")
|
print_with_time(f"Horde Worker Paused for {penaltytime} min - Too many errors. It will resume automatically, but you should restart it.")
|
||||||
print_with_time(f"Caution: Too many failed jobs may lead to entering maintenance mode.")
|
print_with_time("Caution: Too many failed jobs may lead to entering maintenance mode.")
|
||||||
time.sleep(60 * penaltytime)
|
time.sleep(60 * penaltytime)
|
||||||
else:
|
else:
|
||||||
print_with_time(f"Horde Worker Exit limit reached, too many errors.")
|
print_with_time("Horde Worker Exit limit reached, too many errors.")
|
||||||
|
|
||||||
global last_non_horde_req_time
|
global last_non_horde_req_time
|
||||||
sec_since_non_horde = time.time() - last_non_horde_req_time
|
sec_since_non_horde = time.time() - last_non_horde_req_time
|
||||||
|
@ -3983,13 +3994,13 @@ def run_horde_worker(args, api_key, worker_name):
|
||||||
time.sleep(slp)
|
time.sleep(slp)
|
||||||
sleepy_counter += 1
|
sleepy_counter += 1
|
||||||
if sleepy_counter==20:
|
if sleepy_counter==20:
|
||||||
print_with_time(f"No recent jobs, entering low power mode...")
|
print_with_time("No recent jobs, entering low power mode...")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
sleepy_counter = 0
|
sleepy_counter = 0
|
||||||
current_id = pop['id']
|
current_id = pop['id']
|
||||||
current_payload = pop['payload']
|
current_payload = pop['payload']
|
||||||
print(f"") #empty newline
|
print("") #empty newline
|
||||||
print_with_time(f"Job received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...")
|
print_with_time(f"Job received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...")
|
||||||
|
|
||||||
#do gen
|
#do gen
|
||||||
|
@ -4005,11 +4016,11 @@ def run_horde_worker(args, api_key, worker_name):
|
||||||
if currentjob_attempts>5:
|
if currentjob_attempts>5:
|
||||||
break
|
break
|
||||||
|
|
||||||
print_with_time(f"Server Busy - Not ready to generate...")
|
print_with_time("Server Busy - Not ready to generate...")
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
#submit reply
|
#submit reply
|
||||||
print(f"") #empty newline
|
print("") #empty newline
|
||||||
if current_generation:
|
if current_generation:
|
||||||
submit_dict = {
|
submit_dict = {
|
||||||
"id": current_id,
|
"id": current_id,
|
||||||
|
@ -4020,15 +4031,15 @@ def run_horde_worker(args, api_key, worker_name):
|
||||||
submit_thread = threading.Thread(target=submit_completed_generation, args=(submiturl, current_id, session_starttime, submit_dict))
|
submit_thread = threading.Thread(target=submit_completed_generation, args=(submiturl, current_id, session_starttime, submit_dict))
|
||||||
submit_thread.start() #submit job in new thread so nothing is waiting
|
submit_thread.start() #submit job in new thread so nothing is waiting
|
||||||
else:
|
else:
|
||||||
print_with_time(f"Error, Abandoned current job due to errors. Getting new job.")
|
print_with_time("Error, Abandoned current job due to errors. Getting new job.")
|
||||||
current_id = None
|
current_id = None
|
||||||
current_payload = None
|
current_payload = None
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
if exitcounter<100:
|
if exitcounter<100:
|
||||||
print_with_time(f"Horde Worker Shutdown - Too many errors.")
|
print_with_time("Horde Worker Shutdown - Too many errors.")
|
||||||
else:
|
else:
|
||||||
print_with_time(f"Horde Worker Shutdown - Server Closing.")
|
print_with_time("Horde Worker Shutdown - Server Closing.")
|
||||||
exitcounter = 999
|
exitcounter = 999
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
@ -4071,7 +4082,7 @@ def check_deprecation_warning():
|
||||||
# but i am not going to troubleshoot or provide support for deprecated flags.
|
# but i am not going to troubleshoot or provide support for deprecated flags.
|
||||||
global using_outdated_flags
|
global using_outdated_flags
|
||||||
if using_outdated_flags:
|
if using_outdated_flags:
|
||||||
print(f"\n=== !!! IMPORTANT WARNING !!! ===")
|
print("\n=== !!! IMPORTANT WARNING !!! ===")
|
||||||
print("You are using one or more OUTDATED config files or launch flags!")
|
print("You are using one or more OUTDATED config files or launch flags!")
|
||||||
print("The flags --hordeconfig and --sdconfig have been DEPRECATED, and MAY be REMOVED in future!")
|
print("The flags --hordeconfig and --sdconfig have been DEPRECATED, and MAY be REMOVED in future!")
|
||||||
print("They will still work for now, but you SHOULD switch to the updated flags instead, to avoid future issues!")
|
print("They will still work for now, but you SHOULD switch to the updated flags instead, to avoid future issues!")
|
||||||
|
@ -4086,7 +4097,8 @@ def setuptunnel(has_sd):
|
||||||
# This script will help setup a cloudflared tunnel for accessing KoboldCpp over the internet
|
# This script will help setup a cloudflared tunnel for accessing KoboldCpp over the internet
|
||||||
# It should work out of the box on both linux and windows
|
# It should work out of the box on both linux and windows
|
||||||
try:
|
try:
|
||||||
import subprocess, re
|
import subprocess
|
||||||
|
import re
|
||||||
global sslvalid
|
global sslvalid
|
||||||
httpsaffix = ("https" if sslvalid else "http")
|
httpsaffix = ("https" if sslvalid else "http")
|
||||||
def run_tunnel():
|
def run_tunnel():
|
||||||
|
@ -4253,7 +4265,9 @@ def delete_old_pyinstaller():
|
||||||
if not base_path:
|
if not base_path:
|
||||||
return
|
return
|
||||||
|
|
||||||
import time, os, shutil
|
import time
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
selfdirpath = os.path.abspath(base_path)
|
selfdirpath = os.path.abspath(base_path)
|
||||||
temp_parentdir_path = os.path.abspath(os.path.join(base_path, '..'))
|
temp_parentdir_path = os.path.abspath(os.path.join(base_path, '..'))
|
||||||
for dirname in os.listdir(temp_parentdir_path):
|
for dirname in os.listdir(temp_parentdir_path):
|
||||||
|
@ -4369,7 +4383,7 @@ def main(launch_args,start_server=True):
|
||||||
ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\nPlease check command line: script.py --help"
|
ermsg = "Reason: " + str(ex) + "\nFile selection GUI unsupported.\ncustomtkinter python module required!\nPlease check command line: script.py --help"
|
||||||
show_gui_msgbox("Warning, GUI failed to start",ermsg)
|
show_gui_msgbox("Warning, GUI failed to start",ermsg)
|
||||||
if args.skiplauncher:
|
if args.skiplauncher:
|
||||||
print(f"Note: In order to use --skiplauncher, you need to specify a model with --model")
|
print("Note: In order to use --skiplauncher, you need to specify a model with --model")
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
|
@ -4383,7 +4397,7 @@ def main(launch_args,start_server=True):
|
||||||
preloaded_story = f.read()
|
preloaded_story = f.read()
|
||||||
canload = True
|
canload = True
|
||||||
elif isinstance(args.preloadstory, str):
|
elif isinstance(args.preloadstory, str):
|
||||||
print(f"Preloading saved story as JSON into server...")
|
print("Preloading saved story as JSON into server...")
|
||||||
try:
|
try:
|
||||||
import ast
|
import ast
|
||||||
parsed = ast.literal_eval(args.preloadstory)
|
parsed = ast.literal_eval(args.preloadstory)
|
||||||
|
@ -4400,7 +4414,7 @@ def main(launch_args,start_server=True):
|
||||||
if canload:
|
if canload:
|
||||||
print("Saved story preloaded.")
|
print("Saved story preloaded.")
|
||||||
else:
|
else:
|
||||||
print(f"Warning: Saved story file invalid or not found. No story will be preloaded into server.")
|
print("Warning: Saved story file invalid or not found. No story will be preloaded into server.")
|
||||||
|
|
||||||
# try to read chat completions adapter
|
# try to read chat completions adapter
|
||||||
if args.chatcompletionsadapter:
|
if args.chatcompletionsadapter:
|
||||||
|
@ -4439,9 +4453,9 @@ def main(launch_args,start_server=True):
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print(ex)
|
print(ex)
|
||||||
if canload:
|
if canload:
|
||||||
print(f"Chat Completions Adapter Loaded")
|
print("Chat Completions Adapter Loaded")
|
||||||
else:
|
else:
|
||||||
print(f"Warning: Chat Completions Adapter invalid or not found.")
|
print("Warning: Chat Completions Adapter invalid or not found.")
|
||||||
|
|
||||||
# handle model downloads if needed
|
# handle model downloads if needed
|
||||||
if args.model_param and args.model_param!="":
|
if args.model_param and args.model_param!="":
|
||||||
|
@ -4544,7 +4558,7 @@ def main(launch_args,start_server=True):
|
||||||
print("WARNING: GPU layers is set, but a GPU backend was not selected! GPU will not be used!")
|
print("WARNING: GPU layers is set, but a GPU backend was not selected! GPU will not be used!")
|
||||||
args.gpulayers = 0
|
args.gpulayers = 0
|
||||||
elif args.gpulayers==-1 and sys.platform=="darwin" and args.model_param and os.path.exists(args.model_param):
|
elif args.gpulayers==-1 and sys.platform=="darwin" and args.model_param and os.path.exists(args.model_param):
|
||||||
print(f"MacOS detected: Auto GPU layers set to maximum")
|
print("MacOS detected: Auto GPU layers set to maximum")
|
||||||
args.gpulayers = 200
|
args.gpulayers = 200
|
||||||
elif not shouldavoidgpu and args.model_param and os.path.exists(args.model_param):
|
elif not shouldavoidgpu and args.model_param and os.path.exists(args.model_param):
|
||||||
if (args.usecublas is None) and (args.usevulkan is None) and (args.useclblast is None):
|
if (args.usecublas is None) and (args.usevulkan is None) and (args.useclblast is None):
|
||||||
|
@ -4560,7 +4574,7 @@ def main(launch_args,start_server=True):
|
||||||
print(f"Auto Recommended GPU Layers: {layeramt}")
|
print(f"Auto Recommended GPU Layers: {layeramt}")
|
||||||
args.gpulayers = layeramt
|
args.gpulayers = layeramt
|
||||||
else:
|
else:
|
||||||
print(f"No GPU backend found, or could not automatically determine GPU layers. Please set it manually.")
|
print("No GPU backend found, or could not automatically determine GPU layers. Please set it manually.")
|
||||||
args.gpulayers = 0
|
args.gpulayers = 0
|
||||||
|
|
||||||
if args.threads == -1:
|
if args.threads == -1:
|
||||||
|
@ -4654,27 +4668,27 @@ def main(launch_args,start_server=True):
|
||||||
if os.path.exists(args.sdlora):
|
if os.path.exists(args.sdlora):
|
||||||
imglora = os.path.abspath(args.sdlora)
|
imglora = os.path.abspath(args.sdlora)
|
||||||
else:
|
else:
|
||||||
print(f"Missing SD LORA model file...")
|
print("Missing SD LORA model file...")
|
||||||
if args.sdvae:
|
if args.sdvae:
|
||||||
if os.path.exists(args.sdvae):
|
if os.path.exists(args.sdvae):
|
||||||
imgvae = os.path.abspath(args.sdvae)
|
imgvae = os.path.abspath(args.sdvae)
|
||||||
else:
|
else:
|
||||||
print(f"Missing SD VAE model file...")
|
print("Missing SD VAE model file...")
|
||||||
if args.sdt5xxl:
|
if args.sdt5xxl:
|
||||||
if os.path.exists(args.sdt5xxl):
|
if os.path.exists(args.sdt5xxl):
|
||||||
imgt5xxl = os.path.abspath(args.sdt5xxl)
|
imgt5xxl = os.path.abspath(args.sdt5xxl)
|
||||||
else:
|
else:
|
||||||
print(f"Missing SD T5-XXL model file...")
|
print("Missing SD T5-XXL model file...")
|
||||||
if args.sdclipl:
|
if args.sdclipl:
|
||||||
if os.path.exists(args.sdclipl):
|
if os.path.exists(args.sdclipl):
|
||||||
imgclipl = os.path.abspath(args.sdclipl)
|
imgclipl = os.path.abspath(args.sdclipl)
|
||||||
else:
|
else:
|
||||||
print(f"Missing SD Clip-L model file...")
|
print("Missing SD Clip-L model file...")
|
||||||
if args.sdclipg:
|
if args.sdclipg:
|
||||||
if os.path.exists(args.sdclipg):
|
if os.path.exists(args.sdclipg):
|
||||||
imgclipg = os.path.abspath(args.sdclipg)
|
imgclipg = os.path.abspath(args.sdclipg)
|
||||||
else:
|
else:
|
||||||
print(f"Missing SD Clip-G model file...")
|
print("Missing SD Clip-G model file...")
|
||||||
|
|
||||||
imgmodel = os.path.abspath(imgmodel)
|
imgmodel = os.path.abspath(imgmodel)
|
||||||
fullsdmodelpath = imgmodel
|
fullsdmodelpath = imgmodel
|
||||||
|
@ -4719,7 +4733,7 @@ def main(launch_args,start_server=True):
|
||||||
embedded_kailite = embedded_kailite.replace(origStr, patchedStr)
|
embedded_kailite = embedded_kailite.replace(origStr, patchedStr)
|
||||||
embedded_kailite = embedded_kailite.encode()
|
embedded_kailite = embedded_kailite.encode()
|
||||||
print("Embedded KoboldAI Lite loaded.")
|
print("Embedded KoboldAI Lite loaded.")
|
||||||
except Exception as e:
|
except Exception:
|
||||||
print("Could not find KoboldAI Lite. Embedded KoboldAI Lite will not be available.")
|
print("Could not find KoboldAI Lite. Embedded KoboldAI Lite will not be available.")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -4727,7 +4741,7 @@ def main(launch_args,start_server=True):
|
||||||
with open(os.path.join(basepath, "kcpp_docs.embd"), mode='rb') as f:
|
with open(os.path.join(basepath, "kcpp_docs.embd"), mode='rb') as f:
|
||||||
embedded_kcpp_docs = f.read()
|
embedded_kcpp_docs = f.read()
|
||||||
print("Embedded API docs loaded.")
|
print("Embedded API docs loaded.")
|
||||||
except Exception as e:
|
except Exception:
|
||||||
print("Could not find Embedded KoboldCpp API docs.")
|
print("Could not find Embedded KoboldCpp API docs.")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -4736,7 +4750,7 @@ def main(launch_args,start_server=True):
|
||||||
embedded_kcpp_sdui = f.read()
|
embedded_kcpp_sdui = f.read()
|
||||||
if args.sdmodel:
|
if args.sdmodel:
|
||||||
print("Embedded SDUI loaded.")
|
print("Embedded SDUI loaded.")
|
||||||
except Exception as e:
|
except Exception:
|
||||||
print("Could not find Embedded SDUI.")
|
print("Could not find Embedded SDUI.")
|
||||||
|
|
||||||
if args.port_param!=defaultport:
|
if args.port_param!=defaultport:
|
||||||
|
@ -4765,7 +4779,7 @@ def main(launch_args,start_server=True):
|
||||||
try:
|
try:
|
||||||
import webbrowser as wb
|
import webbrowser as wb
|
||||||
wb.open(epurl)
|
wb.open(epurl)
|
||||||
except:
|
except Exception:
|
||||||
print("--launch was set, but could not launch web browser automatically.")
|
print("--launch was set, but could not launch web browser automatically.")
|
||||||
|
|
||||||
if args.hordekey and args.hordekey!="":
|
if args.hordekey and args.hordekey!="":
|
||||||
|
@ -4805,12 +4819,12 @@ def main(launch_args,start_server=True):
|
||||||
benchbaneos = False
|
benchbaneos = False
|
||||||
if args.benchmark:
|
if args.benchmark:
|
||||||
if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000:
|
if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000:
|
||||||
print(f"\nWarning: The benchmark CSV output file you selected exceeds 1MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.")
|
print("\nWarning: The benchmark CSV output file you selected exceeds 1MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.")
|
||||||
save_to_file = False
|
save_to_file = False
|
||||||
if save_to_file:
|
if save_to_file:
|
||||||
print(f"\nRunning benchmark (Save to File: {args.benchmark})...")
|
print(f"\nRunning benchmark (Save to File: {args.benchmark})...")
|
||||||
else:
|
else:
|
||||||
print(f"\nRunning benchmark (Not Saved)...")
|
print("\nRunning benchmark (Not Saved)...")
|
||||||
if benchprompt=="":
|
if benchprompt=="":
|
||||||
benchprompt = " 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1"
|
benchprompt = " 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1"
|
||||||
for i in range(0,14): #generate massive prompt
|
for i in range(0,14): #generate massive prompt
|
||||||
|
@ -4856,7 +4870,7 @@ def main(launch_args,start_server=True):
|
||||||
with open(args.benchmark, "a") as file:
|
with open(args.benchmark, "a") as file:
|
||||||
file.seek(0, 2)
|
file.seek(0, 2)
|
||||||
if file.tell() == 0: #empty file
|
if file.tell() == 0: #empty file
|
||||||
file.write(f"Timestamp,Backend,Layers,Model,MaxCtx,GenAmount,ProcessingTime,ProcessingSpeed,GenerationTime,GenerationSpeed,TotalTime,Output,Flags")
|
file.write("Timestamp,Backend,Layers,Model,MaxCtx,GenAmount,ProcessingTime,ProcessingSpeed,GenerationTime,GenerationSpeed,TotalTime,Output,Flags")
|
||||||
file.write(f"\n{datetimestamp},{libname},{args.gpulayers},{benchmodel},{benchmaxctx},{benchlen},{t_pp:.2f},{s_pp:.2f},{t_gen:.2f},{s_gen:.2f},{(t_pp+t_gen):.2f},{result},{benchflagstr}")
|
file.write(f"\n{datetimestamp},{libname},{args.gpulayers},{benchmodel},{benchmaxctx},{benchlen},{t_pp:.2f},{s_pp:.2f},{t_gen:.2f},{s_gen:.2f},{(t_pp+t_gen):.2f},{result},{benchflagstr}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error writing benchmark to file: {e}")
|
print(f"Error writing benchmark to file: {e}")
|
||||||
|
@ -4877,7 +4891,7 @@ def main(launch_args,start_server=True):
|
||||||
else:
|
else:
|
||||||
# Flush stdout for previous win32 issue so the client can see output.
|
# Flush stdout for previous win32 issue so the client can see output.
|
||||||
if not args.prompt or args.benchmark:
|
if not args.prompt or args.benchmark:
|
||||||
print(f"Server was not started, main function complete. Idling.", flush=True)
|
print("Server was not started, main function complete. Idling.", flush=True)
|
||||||
|
|
||||||
def run_in_queue(launch_args, input_queue, output_queue):
|
def run_in_queue(launch_args, input_queue, output_queue):
|
||||||
main(launch_args, start_server=False)
|
main(launch_args, start_server=False)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue