mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
Merge branch 'LostRuins:concedo' into concedo
This commit is contained in:
commit
4e46673f80
40 changed files with 6555 additions and 2099 deletions
489
koboldcpp.py
489
koboldcpp.py
|
@ -12,6 +12,8 @@ import json, sys, http.server, time, asyncio, socket, threading
|
|||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
stop_token_max = 10
|
||||
sampler_order_max = 7
|
||||
ban_token_max = 10
|
||||
|
||||
class load_model_inputs(ctypes.Structure):
|
||||
_fields_ = [("threads", ctypes.c_int),
|
||||
|
@ -29,10 +31,12 @@ class load_model_inputs(ctypes.Structure):
|
|||
("use_smartcontext", ctypes.c_bool),
|
||||
("unban_tokens", ctypes.c_bool),
|
||||
("clblast_info", ctypes.c_int),
|
||||
("cublas_info", ctypes.c_int),
|
||||
("blasbatchsize", ctypes.c_int),
|
||||
("debugmode", ctypes.c_int),
|
||||
("forceversion", ctypes.c_int),
|
||||
("gpulayers", ctypes.c_int)]
|
||||
("gpulayers", ctypes.c_int),
|
||||
("banned_tokens", ctypes.c_char_p * ban_token_max)]
|
||||
|
||||
class generation_inputs(ctypes.Structure):
|
||||
_fields_ = [("seed", ctypes.c_int),
|
||||
|
@ -50,6 +54,8 @@ class generation_inputs(ctypes.Structure):
|
|||
("mirostat", ctypes.c_int),
|
||||
("mirostat_tau", ctypes.c_float),
|
||||
("mirostat_eta", ctypes.c_float),
|
||||
("sampler_order", ctypes.c_int * sampler_order_max),
|
||||
("sampler_len", ctypes.c_int),
|
||||
("stop_sequence", ctypes.c_char_p * stop_token_max),
|
||||
("stream_sse", ctypes.c_bool)]
|
||||
|
||||
|
@ -108,7 +114,7 @@ def init_library():
|
|||
else:
|
||||
print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.")
|
||||
use_clblast = True
|
||||
elif (args.usecublas and args.usecublas!=""):
|
||||
elif (args.usecublas is not None):
|
||||
if not file_exists(lib_cublas):
|
||||
print("Warning: CuBLAS library file not found. Non-BLAS library will be used.")
|
||||
else:
|
||||
|
@ -165,7 +171,7 @@ def load_model(model_filename):
|
|||
inputs.batch_size = 8
|
||||
inputs.max_context_length = maxctx #initial value to use for ctx, can be overwritten
|
||||
inputs.threads = args.threads
|
||||
inputs.low_vram = (True if args.usecublas=="lowvram" else False)
|
||||
inputs.low_vram = (True if (args.usecublas and "lowvram" in args.usecublas) else False)
|
||||
inputs.blasthreads = args.blasthreads
|
||||
inputs.f16_kv = True
|
||||
inputs.use_mmap = (not args.nommap)
|
||||
|
@ -186,12 +192,25 @@ def load_model(model_filename):
|
|||
if args.useclblast:
|
||||
clblastids = 100 + int(args.useclblast[0])*10 + int(args.useclblast[1])
|
||||
inputs.clblast_info = clblastids
|
||||
inputs.cublas_info = 0
|
||||
if (args.usecublas and "0" in args.usecublas):
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
elif (args.usecublas and "1" in args.usecublas):
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
||||
elif (args.usecublas and "2" in args.usecublas):
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
|
||||
inputs.executable_path = (getdirpath()+"/").encode("UTF-8")
|
||||
inputs.debugmode = args.debugmode
|
||||
banned_tokens = args.bantokens
|
||||
for n in range(ban_token_max):
|
||||
if not banned_tokens or n >= len(banned_tokens):
|
||||
inputs.banned_tokens[n] = "".encode("UTF-8")
|
||||
else:
|
||||
inputs.banned_tokens[n] = banned_tokens[n].encode("UTF-8")
|
||||
ret = handle.load_model(inputs)
|
||||
return ret
|
||||
|
||||
def generate(prompt,max_length=20, max_context_length=512,temperature=0.8,top_k=120, top_a=0.0 ,top_p=0.85, typical_p=1.0, tfs=1.0 ,rep_pen=1.1,rep_pen_range=128,seed=-1,stop_sequence=[],stream_sse=False):
|
||||
def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_k=120, top_a=0.0, top_p=0.85, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], stream_sse=False):
|
||||
inputs = generation_inputs()
|
||||
outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
|
||||
inputs.prompt = prompt.encode("UTF-8")
|
||||
|
@ -210,8 +229,19 @@ def generate(prompt,max_length=20, max_context_length=512,temperature=0.8,top_k=
|
|||
inputs.mirostat = int(args.usemirostat[0])
|
||||
inputs.mirostat_tau = float(args.usemirostat[1])
|
||||
inputs.mirostat_eta = float(args.usemirostat[2])
|
||||
elif mirostat in (1, 2):
|
||||
inputs.mirostat = mirostat
|
||||
inputs.mirostat_tau = mirostat_tau
|
||||
inputs.mirostat_eta = mirostat_eta
|
||||
else:
|
||||
inputs.mirostat = inputs.mirostat_tau = inputs.mirostat_eta = 0
|
||||
if sampler_order and 0 < len(sampler_order) <= sampler_order_max:
|
||||
try:
|
||||
for i, sampler in enumerate(sampler_order):
|
||||
inputs.sampler_order[i] = sampler
|
||||
inputs.sampler_len = len(sampler_order)
|
||||
except TypeError as e:
|
||||
print("ERROR: sampler_order must be a list of integers: " + str(e))
|
||||
inputs.seed = seed
|
||||
for n in range(stop_token_max):
|
||||
if not stop_sequence or n >= len(stop_sequence):
|
||||
|
@ -241,7 +271,7 @@ maxhordectx = 1024
|
|||
maxhordelen = 256
|
||||
modelbusy = False
|
||||
defaultport = 5001
|
||||
KcppVersion = "1.34"
|
||||
KcppVersion = "1.34.2"
|
||||
showdebug = True
|
||||
|
||||
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||
|
@ -277,6 +307,10 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
tfs=genparams.get('tfs', 1.0),
|
||||
rep_pen=genparams.get('rep_pen', 1.1),
|
||||
rep_pen_range=genparams.get('rep_pen_range', 128),
|
||||
mirostat=genparams.get('mirostat', 0),
|
||||
mirostat_tau=genparams.get('mirostat_tau', 5.0),
|
||||
mirostat_eta=genparams.get('mirostat_eta', 0.1),
|
||||
sampler_order=genparams.get('sampler_order', [6,0,1,3,4,2,5]),
|
||||
seed=genparams.get('sampler_seed', -1),
|
||||
stop_sequence=genparams.get('stop_sequence', []),
|
||||
stream_sse=stream_flag)
|
||||
|
@ -293,6 +327,10 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
tfs=genparams.get('tfs', 1.0),
|
||||
rep_pen=genparams.get('rep_pen', 1.1),
|
||||
rep_pen_range=genparams.get('rep_pen_range', 128),
|
||||
mirostat=genparams.get('mirostat', 0),
|
||||
mirostat_tau=genparams.get('mirostat_tau', 5.0),
|
||||
mirostat_eta=genparams.get('mirostat_eta', 0.1),
|
||||
sampler_order=genparams.get('sampler_order', [6,0,1,3,4,2,5]),
|
||||
seed=genparams.get('sampler_seed', -1),
|
||||
stop_sequence=genparams.get('stop_sequence', []),
|
||||
stream_sse=stream_flag)
|
||||
|
@ -569,10 +607,413 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None):
|
|||
threadArr[i].stop()
|
||||
sys.exit(0)
|
||||
|
||||
# note: customtkinter-5.2.0
|
||||
def show_new_gui():
|
||||
import customtkinter as ctk
|
||||
from tkinter.filedialog import askopenfilename
|
||||
from tkinter.filedialog import asksaveasfile
|
||||
|
||||
def show_gui():
|
||||
# if args received, launch
|
||||
if len(sys.argv) != 1:
|
||||
root = ctk.CTk()
|
||||
#we dont want the useless window to be visible, but we want it in taskbar
|
||||
root.attributes("-alpha", 0)
|
||||
args.model_param = askopenfilename(title="Select ggml model .bin files")
|
||||
root.destroy()
|
||||
if not args.model_param:
|
||||
print("\nNo ggml model file was selected. Exiting.")
|
||||
time.sleep(2)
|
||||
sys.exit(2)
|
||||
return
|
||||
|
||||
nextstate = 0 #0=exit, 1=launch, 2=oldgui
|
||||
windowwidth = 520
|
||||
windowheight = 500
|
||||
ctk.set_appearance_mode("dark")
|
||||
root = ctk.CTk()
|
||||
root.geometry(str(windowwidth) + "x" + str(windowheight))
|
||||
root.title("KoboldCpp v"+KcppVersion)
|
||||
root.resizable(False,False)
|
||||
|
||||
tabs = ctk.CTkFrame(root, corner_radius = 0, width=windowwidth, height=windowheight-50)
|
||||
tabs.grid(row=0, stick="nsew")
|
||||
tabnames= ["Quick Launch", "Hardware", "Tokens", "Model", "Network"]
|
||||
navbuttons = {}
|
||||
navbuttonframe = ctk.CTkFrame(tabs, width=100, height=int(tabs.cget("height")))
|
||||
navbuttonframe.grid(row=0, column=0, padx=2,pady=2)
|
||||
navbuttonframe.grid_propagate(False)
|
||||
|
||||
tabcontentframe = ctk.CTkFrame(tabs, width=windowwidth - int(navbuttonframe.cget("width")), height=int(tabs.cget("height")))
|
||||
tabcontentframe.grid(row=0, column=1, sticky="nsew", padx=2, pady=2)
|
||||
tabcontentframe.grid_propagate(False)
|
||||
|
||||
tabcontent = {}
|
||||
|
||||
# slider data
|
||||
blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024"]
|
||||
blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024"]
|
||||
contextsize_text = ["512", "1024", "2048", "3072", "4096", "6144", "8192"]
|
||||
runopts = ["Use OpenBLAS","Use CLBlast", "Use CuBLAS", "Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
|
||||
|
||||
def tabbuttonaction(name):
|
||||
for t in tabcontent:
|
||||
if name == t:
|
||||
tabcontent[t].grid(row=0, column=0)
|
||||
navbuttons[t].configure(fg_color="#6f727b")
|
||||
else:
|
||||
tabcontent[t].grid_forget()
|
||||
navbuttons[t].configure(fg_color="transparent")
|
||||
|
||||
# Dynamically create tabs + buttons based on values of [tabnames]
|
||||
for idx, name in enumerate(tabnames):
|
||||
tabcontent[name] = ctk.CTkFrame(tabcontentframe, width=int(tabcontentframe.cget("width")), height=int(tabcontentframe.cget("height")), fg_color="transparent")
|
||||
tabcontent[name].grid_propagate(False)
|
||||
if idx == 0:
|
||||
tabcontent[name].grid(row=idx, sticky="nsew")
|
||||
ctk.CTkLabel(tabcontent[name], text= name, font=ctk.CTkFont(None, 14, 'bold')).grid(row=0, padx=12, pady = 5, stick='nw')
|
||||
|
||||
navbuttons[name] = ctk.CTkButton(navbuttonframe, text=name, width = 100, corner_radius=0 , command = lambda d=name:tabbuttonaction(d), hover_color="#868a94" )
|
||||
navbuttons[name].grid(row=idx)
|
||||
|
||||
tabbuttonaction(tabnames[0])
|
||||
|
||||
# helper functions
|
||||
def makecheckbox(parent, text, variable=None, row=0, column=0, command=None, onvalue=1, offvalue=0):
|
||||
temp = ctk.CTkCheckBox(parent, text=text,variable=variable, onvalue=onvalue, offvalue=offvalue)
|
||||
if command is not None and variable is not None:
|
||||
variable.trace("w", command)
|
||||
temp.grid(row=row,column=column, padx=8, pady=1, stick="nw")
|
||||
return temp
|
||||
|
||||
def makelabel(parent, text, row, column=0):
|
||||
temp = ctk.CTkLabel(parent, text=text)
|
||||
temp.grid(row=row, column=column, padx=8, pady=1, stick="nw")
|
||||
return temp
|
||||
|
||||
def makeslider(parent, label, options, var, from_ , to, row=0, width=160, height=10, set=0):
|
||||
sliderLabel = makelabel(parent, options[set], row + 1, 1)
|
||||
makelabel(parent, label, row)
|
||||
|
||||
def sliderUpdate(a,b,c):
|
||||
sliderLabel.configure(text = options[int(var.get())])
|
||||
var.trace("w", sliderUpdate)
|
||||
slider = ctk.CTkSlider(parent, from_=from_, to=to, variable = var, width = width, height=height, border_width=5,number_of_steps=len(options) - 1)
|
||||
slider.grid(row=row+1, column=0, padx = 8, stick="w")
|
||||
slider.set(set)
|
||||
return slider
|
||||
|
||||
|
||||
def makelabelentry(parent, text, var, row=0, width= 50):
|
||||
label = makelabel(parent, text, row)
|
||||
entry = ctk.CTkEntry(parent, width=width, textvariable=var) #you cannot set placeholder text for SHARED variables
|
||||
entry.grid(row=row, column=1, padx= 8, stick="nw")
|
||||
return entry, label
|
||||
|
||||
|
||||
def makefileentry(parent, text, searchtext, var, row=0, width=250):
|
||||
makelabel(parent, text, row)
|
||||
def getfilename(var, text):
|
||||
var.set(askopenfilename(title=text))
|
||||
entry = ctk.CTkEntry(parent, width, textvariable=var)
|
||||
entry.grid(row=row+1, column=0, padx=8, stick="nw")
|
||||
button = ctk.CTkButton(parent, 50, text="Browse", command= lambda a=var,b=searchtext:getfilename(a,b))
|
||||
button.grid(row=row+1, column=1, stick="nw")
|
||||
return
|
||||
|
||||
# Vars - should be in scope to be used by multiple widgets
|
||||
gpulayers_var = ctk.StringVar(value="0")
|
||||
threads_var = ctk.StringVar(value=str(default_threads))
|
||||
runopts_var = ctk.StringVar()
|
||||
gpu_choice_var = ctk.StringVar(value="1")
|
||||
|
||||
launchbrowser = ctk.IntVar(value=1)
|
||||
highpriority = ctk.IntVar()
|
||||
disablemmap = ctk.IntVar()
|
||||
psutil = ctk.IntVar()
|
||||
usemlock = ctk.IntVar()
|
||||
debugmode = ctk.IntVar()
|
||||
|
||||
lowvram_var = ctk.IntVar()
|
||||
|
||||
blas_threads_var = ctk.StringVar()
|
||||
blas_size_var = ctk.IntVar()
|
||||
version_var =ctk.StringVar(value="0")
|
||||
|
||||
stream = ctk.IntVar()
|
||||
smartcontext = ctk.IntVar()
|
||||
unbantokens = ctk.IntVar()
|
||||
usemirostat = ctk.IntVar()
|
||||
mirostat_var = ctk.StringVar(value="2")
|
||||
mirostat_tau = ctk.StringVar(value="5.0")
|
||||
mirostat_eta = ctk.StringVar(value="0.1")
|
||||
|
||||
context_var = ctk.IntVar()
|
||||
|
||||
model_var = ctk.StringVar()
|
||||
lora_var = ctk.StringVar()
|
||||
lora_base_var = ctk.StringVar()
|
||||
|
||||
port_var = ctk.StringVar(value=defaultport)
|
||||
host_var = ctk.StringVar(value="")
|
||||
horde_name_var = ctk.StringVar(value="koboldcpp")
|
||||
horde_gen_var = ctk.StringVar(value=maxhordelen)
|
||||
horde_context_var = ctk.StringVar(value=maxhordectx)
|
||||
usehorde_var = ctk.IntVar()
|
||||
|
||||
# Quick Launch Tab
|
||||
quick_tab = tabcontent["Quick Launch"]
|
||||
|
||||
# gpu options
|
||||
quick_gpu_layers_entry,quick_gpu_layers_label = makelabelentry(quick_tab,"GPU Layers:", gpulayers_var, 4, 50)
|
||||
quick_gpu_selector_label = makelabel(quick_tab, "GPU ID:", 3)
|
||||
quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=["1","2","3"], width=60, variable=gpu_choice_var, state="readonly")
|
||||
quick_lowvram_box = makecheckbox(quick_tab, "Low VRAM", lowvram_var, 5)
|
||||
|
||||
# hides gpu options when CLBlast is not chosen
|
||||
def changerunmode(a,b,c):
|
||||
index = runopts_var.get()
|
||||
if index == "Use CLBlast" or index == "Use CuBLAS":
|
||||
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
||||
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
||||
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
||||
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
||||
else:
|
||||
gpu_selector_label.grid_forget()
|
||||
gpu_selector_box.grid_forget()
|
||||
quick_gpu_selector_label.grid_forget()
|
||||
quick_gpu_selector_box.grid_forget()
|
||||
|
||||
if index == "Use CuBLAS":
|
||||
lowvram_box.grid(row=4, column=0, padx=8, pady=1, stick="nw")
|
||||
quick_lowvram_box.grid(row=4, column=0, padx=8, pady=1, stick="nw")
|
||||
else:
|
||||
lowvram_box.grid_forget()
|
||||
quick_lowvram_box.grid_forget()
|
||||
|
||||
if index == "Use CLBlast" or index == "Use CuBLAS":
|
||||
gpu_layers_label.grid(row=5, column=0, padx = 8, pady=1, stick="nw")
|
||||
gpu_layers_entry.grid(row=5, column=1, padx=8, pady=1, stick="nw")
|
||||
quick_gpu_layers_label.grid(row=5, column=0, padx = 8, pady=1, stick="nw")
|
||||
quick_gpu_layers_entry.grid(row=5, column=1, padx=8, pady=1, stick="nw")
|
||||
else:
|
||||
gpu_layers_label.grid_forget()
|
||||
gpu_layers_entry.grid_forget()
|
||||
quick_gpu_layers_label.grid_forget()
|
||||
quick_gpu_layers_entry.grid_forget()
|
||||
|
||||
# presets selector
|
||||
makelabel(quick_tab, "Presets:", 1)
|
||||
|
||||
runoptbox = ctk.CTkComboBox(quick_tab, values=runopts, width=180,variable=runopts_var, state="readonly")
|
||||
runoptbox.grid(row=1, column=1,padx=8, stick="nw")
|
||||
runoptbox.set("Use OpenBLAS")
|
||||
|
||||
# threads
|
||||
makelabelentry(quick_tab, "Threads:" , threads_var, 8, 50)
|
||||
|
||||
# blas batch size
|
||||
makeslider(quick_tab, "BLAS Batch Size:", blasbatchsize_text, blas_size_var, 0, 6, 12, set=5)
|
||||
|
||||
# quick boxes
|
||||
quick_boxes = {"Launch Browser": launchbrowser , "High Priority" : highpriority, "Streaming Mode":stream, "Use SmartContext":smartcontext, "Unban Tokens":unbantokens, "Disable MMAP":disablemmap,}
|
||||
for idx, name, in enumerate(quick_boxes):
|
||||
makecheckbox(quick_tab, name, quick_boxes[name], int(idx/2) +20, idx%2)
|
||||
|
||||
# context size
|
||||
makeslider(quick_tab, "Context Size:", contextsize_text, context_var, 0, len(contextsize_text)-1, 30, set=2)
|
||||
|
||||
# load model
|
||||
makefileentry(quick_tab, "Model:", "Select GGML Model File", model_var, 40, 170)
|
||||
|
||||
# Hardware Tab
|
||||
hardware_tab = tabcontent["Hardware"]
|
||||
|
||||
# gpu options
|
||||
gpu_layers_entry,gpu_layers_label = makelabelentry(hardware_tab,"GPU Layers:", gpulayers_var, 4, 50)
|
||||
gpu_selector_label = makelabel(hardware_tab, "GPU ID:", 3)
|
||||
gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=["1","2","3"], width=60, variable=gpu_choice_var, state="readonly")
|
||||
lowvram_box = makecheckbox(hardware_tab, "Low VRAM", lowvram_var, 5)
|
||||
|
||||
# presets selector
|
||||
makelabel(hardware_tab, "Presets:", 1)
|
||||
runoptbox = ctk.CTkComboBox(hardware_tab, values=runopts, width=180,variable=runopts_var, state="readonly")
|
||||
runoptbox.grid(row=1, column=1,padx=8, stick="nw")
|
||||
runoptbox.set("Use OpenBLAS")
|
||||
runopts_var.trace('w', changerunmode)
|
||||
changerunmode(1,1,1)
|
||||
# threads
|
||||
makelabelentry(hardware_tab, "Threads:" , threads_var, 8, 50)
|
||||
|
||||
# hardware checkboxes
|
||||
hardware_boxes = {"Launch Browser": launchbrowser , "High Priority" : highpriority, "Disable MMAP":disablemmap, "Use mlock":usemlock, "PSUtil Set Threads":psutil, "Debug Mode":debugmode,}
|
||||
|
||||
for idx, name, in enumerate(hardware_boxes):
|
||||
makecheckbox(hardware_tab, name, hardware_boxes[name], int(idx/2) +30, idx%2)
|
||||
|
||||
# blas thread specifier
|
||||
makelabelentry(hardware_tab, "BLAS threads:" , blas_threads_var, 11, 50)
|
||||
# blas batch size
|
||||
makeslider(hardware_tab, "BLAS Batch Size:", blasbatchsize_text, blas_size_var, 0, 6, 12, set=5)
|
||||
# force version
|
||||
makelabelentry(hardware_tab, "Force Version:" , version_var, 100, 50)
|
||||
|
||||
# Tokens Tab
|
||||
tokens_tab = tabcontent["Tokens"]
|
||||
# tokens checkboxes
|
||||
token_boxes = {"Streaming Mode":stream, "Use SmartContext":smartcontext, "Unban Tokens":unbantokens}
|
||||
for idx, name, in enumerate(token_boxes):
|
||||
makecheckbox(tokens_tab, name, token_boxes[name], idx + 1)
|
||||
|
||||
mirostat_entry, mirostate_label = makelabelentry(tokens_tab, "Mirostat:", mirostat_var)
|
||||
mirostat_tau_entry, mirostat_tau_label = makelabelentry(tokens_tab, "Mirostat Tau:", mirostat_tau)
|
||||
mirostat_eta_entry, mirostat_eta_label = makelabelentry(tokens_tab, "Mirostat Eta:", mirostat_eta)
|
||||
def togglemiro(a,b,c):
|
||||
items = [mirostate_label, mirostat_entry, mirostat_tau_label, mirostat_tau_entry, mirostat_eta_label, mirostat_eta_entry]
|
||||
for idx, item in enumerate(items):
|
||||
if usemirostat.get() == 1:
|
||||
item.grid(row=11 + int(idx/2), column=idx%2, padx=8, stick="nw")
|
||||
else:
|
||||
item.grid_forget()
|
||||
|
||||
|
||||
makecheckbox(tokens_tab, "Use Mirostat", row=10, variable=usemirostat, command=togglemiro)
|
||||
togglemiro(1,1,1)
|
||||
|
||||
# context size
|
||||
makeslider(tokens_tab, "Context Size:",contextsize_text, context_var, 0, 4, 20, set=2)
|
||||
|
||||
# Model Tab
|
||||
model_tab = tabcontent["Model"]
|
||||
|
||||
makefileentry(model_tab, "Model:", "Select GGML Model File", model_var, 1)
|
||||
makefileentry(model_tab, "Lora:", "Select Lora File",lora_var, 3)
|
||||
makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5)
|
||||
|
||||
# Network Tab
|
||||
network_tab = tabcontent["Network"]
|
||||
|
||||
# interfaces
|
||||
makelabelentry(network_tab, "Port: ", port_var, 1, 150)
|
||||
makelabelentry(network_tab, "Host: ", host_var, 2, 150)
|
||||
|
||||
# horde
|
||||
makelabel(network_tab, "Horde:", 3).grid(pady=10)
|
||||
|
||||
horde_name_entry, horde_name_label = makelabelentry(network_tab, "Horde Name:", horde_name_var, 5, 200)
|
||||
horde_gen_entry, horde_gen_label = makelabelentry(network_tab, "Gen. Length:", horde_gen_var, 6, 50)
|
||||
horde_context_entry, horde_context_label = makelabelentry(network_tab, "Max Context:",horde_context_var, 7, 50)
|
||||
|
||||
def togglehorde(a,b,c):
|
||||
labels = [horde_name_label, horde_gen_label, horde_context_label]
|
||||
for idx, item in enumerate([horde_name_entry, horde_gen_entry, horde_context_entry]):
|
||||
if usehorde_var.get() == 1:
|
||||
item.grid(row=5 + idx, column = 1, padx=8, pady=1, stick="nw")
|
||||
labels[idx].grid(row=5 + idx, padx=8, pady=1, stick="nw")
|
||||
else:
|
||||
item.grid_forget()
|
||||
labels[idx].grid_forget()
|
||||
|
||||
usehorde_box = makecheckbox(network_tab, "Configure for Horde", usehorde_var, 4, command=togglehorde)
|
||||
togglehorde(1,1,1)
|
||||
|
||||
# launch
|
||||
def guilaunch():
|
||||
if model_var.get() == "":
|
||||
tmp = askopenfilename(title="Select ggml model .bin files")
|
||||
model_var.set(tmp)
|
||||
nonlocal nextstate
|
||||
nextstate = 1
|
||||
root.destroy()
|
||||
pass
|
||||
|
||||
def switch_old_gui():
|
||||
nonlocal nextstate
|
||||
nextstate = 2
|
||||
root.destroy()
|
||||
pass
|
||||
|
||||
ctk.CTkButton(tabs , text = "Launch", fg_color="#2f8d3c", command = guilaunch, width=80, height = 35 ).grid(row=1,column=1, stick="se", padx= 25, pady=5)
|
||||
|
||||
# ctk.CTkButton(tabs , text = "Save", fg_color="#084a66", command = save_config, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 5, pady=5)
|
||||
# ctk.CTkButton(tabs , text = "Load", fg_color="#084a66", command = load_config, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 70, pady=5)
|
||||
|
||||
ctk.CTkButton(tabs , text = "Old GUI", fg_color="#084a66", command = switch_old_gui, width=100, height = 35 ).grid(row=1,column=0, stick="sw", padx= 5, pady=5)
|
||||
# runs main loop until closed or launch clicked
|
||||
root.mainloop()
|
||||
|
||||
if nextstate==0:
|
||||
print("Exiting by user request.")
|
||||
time.sleep(2)
|
||||
sys.exit()
|
||||
elif nextstate==2:
|
||||
time.sleep(0.1)
|
||||
show_old_gui()
|
||||
else:
|
||||
# processing vars
|
||||
args.threads = int(threads_var.get())
|
||||
|
||||
args.usemlock = usemlock.get() == 1
|
||||
args.debugmode = debugmode.get() == 1
|
||||
args.launch = launchbrowser.get()==1
|
||||
args.highpriority = highpriority.get()==1
|
||||
args.nommap = disablemmap.get()==1
|
||||
args.psutil_set_threads = psutil.get()==1
|
||||
args.stream = stream.get()==1
|
||||
args.smartcontext = smartcontext.get()==1
|
||||
args.unbantokens = unbantokens.get()==1
|
||||
|
||||
gpuchoiceidx = int(gpu_choice_var.get())-1
|
||||
if runopts_var.get() == runopts[1]:
|
||||
args.useclblast = [[0,0], [1,0], [0,1]][gpuchoiceidx]
|
||||
if runopts_var.get() == runopts[2]:
|
||||
args.usecublas = ["lowvram",str(gpuchoiceidx)] if lowvram_var.get() == 1 else ["normal",str(gpuchoiceidx)]
|
||||
if gpulayers_var.get():
|
||||
args.gpulayers = int(gpulayers_var.get())
|
||||
if runopts_var.get()==runopts[3]:
|
||||
args.noblas = True
|
||||
if runopts_var.get()==runopts[4]:
|
||||
args.noavx2 = True
|
||||
if runopts_var.get()==runopts[5]:
|
||||
args.noavx2 = True
|
||||
args.noblas = True
|
||||
args.nommap = True
|
||||
print("[Failsafe Mode : mmap is disabled.]")
|
||||
|
||||
|
||||
|
||||
args.blasthreads = None if blas_threads_var.get()=="" else int(blas_threads_var.get())
|
||||
|
||||
args.blasbatchsize = int(blasbatchsize_values[int(blas_size_var.get())])
|
||||
args.forceversion = 0 if version_var.get()=="" else int(version_var.get())
|
||||
|
||||
args.mirostat = [int(mirostat_var.get()), float(mirostat_tau.get()), float(mirostat_eta.get())] if usemirostat.get()==1 else None
|
||||
args.contextsize = int(contextsize_text[context_var.get()])
|
||||
|
||||
args.model_param = None if model_var.get() == "" else model_var.get()
|
||||
args.lora = None if lora_var.get() == "" else ([lora_var.get()] if lora_base_var.get()=="" else [lora_var.get(), lora_base_var.get()])
|
||||
|
||||
args.port_param = defaultport if port_var.get()=="" else int(port_var.get())
|
||||
args.host = host_var.get()
|
||||
|
||||
args.hordeconfig = None if usehorde_var.get() == 0 else [horde_name_var.get(), horde_gen_var.get(), horde_context_var.get()]
|
||||
|
||||
if not args.model_param:
|
||||
print("\nNo ggml model file was selected. Exiting.")
|
||||
time.sleep(2)
|
||||
sys.exit(2)
|
||||
|
||||
def show_gui_warning():
|
||||
from tkinter import messagebox
|
||||
import tkinter as tk
|
||||
root = tk.Tk()
|
||||
root.attributes("-alpha", 0)
|
||||
messagebox.showerror(title="New GUI failed, using Old GUI", message="The new GUI failed to load.\n\nTo use new GUI, please install the customtkinter python module.")
|
||||
root.destroy()
|
||||
|
||||
def show_old_gui():
|
||||
import tkinter as tk
|
||||
from tkinter.filedialog import askopenfilename
|
||||
from tkinter import messagebox
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
#no args passed at all. Show nooby gui
|
||||
|
@ -649,8 +1090,8 @@ def show_gui():
|
|||
frameD.grid(row=5,column=0,pady=4)
|
||||
|
||||
# Create button, it will change label text
|
||||
tk.Button( root , text = "Launch", font = ("Impact", 18), bg='#54FA9B', command = guilaunch ).grid(row=6,column=0)
|
||||
tk.Label(root, text = "(Please use the Command Line for more advanced options)",
|
||||
tk.Button(root , text = "Launch", font = ("Impact", 18), bg='#54FA9B', command = guilaunch ).grid(row=6,column=0)
|
||||
tk.Label(root, text = "(Please use the Command Line for more advanced options)\nThis GUI is deprecated. Please install customtkinter.",
|
||||
font = ("Arial", 9)).grid(row=7,column=0)
|
||||
|
||||
root.mainloop()
|
||||
|
@ -680,7 +1121,7 @@ def show_gui():
|
|||
if selrunchoice==runopts[3]:
|
||||
args.useclblast = [0,1]
|
||||
if selrunchoice==runopts[4]:
|
||||
args.usecublas = True
|
||||
args.usecublas = ["normal"]
|
||||
if selrunchoice==runopts[5]:
|
||||
args.noblas = True
|
||||
if selrunchoice==runopts[6]:
|
||||
|
@ -733,14 +1174,22 @@ def main(args):
|
|||
if not args.model_param:
|
||||
#give them a chance to pick a file
|
||||
print("For command line arguments, please refer to --help")
|
||||
print("Otherwise, please manually select ggml file:")
|
||||
print("***")
|
||||
try:
|
||||
show_gui()
|
||||
show_new_gui()
|
||||
except Exception as ex:
|
||||
print("File selection GUI unsupported. Please check command line: script.py --help")
|
||||
print("Reason for no GUI: " + str(ex))
|
||||
time.sleep(2)
|
||||
sys.exit(2)
|
||||
print("Failed to use new GUI. Reason: " + str(ex))
|
||||
print("Make sure customtkinter is installed!!!")
|
||||
print("Attempting to use old GUI...")
|
||||
if not args.model_param:
|
||||
try:
|
||||
show_gui_warning()
|
||||
show_old_gui()
|
||||
except Exception as ex2:
|
||||
print("File selection GUI unsupported. Please check command line: script.py --help")
|
||||
print("Reason for no GUI: " + str(ex2))
|
||||
time.sleep(2)
|
||||
sys.exit(2)
|
||||
|
||||
if args.hordeconfig and args.hordeconfig[0]!="":
|
||||
global friendlymodelname, maxhordelen, maxhordectx, showdebug
|
||||
|
@ -810,7 +1259,8 @@ def main(args):
|
|||
args.blasthreads = args.threads
|
||||
|
||||
modelname = os.path.abspath(args.model_param)
|
||||
print(f"Loading model: {modelname} \n[Threads: {args.threads}, BlasThreads: {args.blasthreads}, SmartContext: {args.smartcontext}]")
|
||||
print(args)
|
||||
print(f"==========\nLoading model: {modelname} \n[Threads: {args.threads}, BlasThreads: {args.blasthreads}, SmartContext: {args.smartcontext}]")
|
||||
loadok = load_model(modelname)
|
||||
print("Load Model OK: " + str(loadok))
|
||||
|
||||
|
@ -845,7 +1295,7 @@ def main(args):
|
|||
asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite))
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("Welcome to KoboldCpp - Version " + KcppVersion) # just update version manually
|
||||
print("***\nWelcome to KoboldCpp - Version " + KcppVersion) # just update version manually
|
||||
# print("Python version: " + sys.version)
|
||||
parser = argparse.ArgumentParser(description='KoboldCpp Server')
|
||||
modelgroup = parser.add_mutually_exclusive_group() #we want to be backwards compatible with the unnamed positional args
|
||||
|
@ -869,7 +1319,8 @@ if __name__ == '__main__':
|
|||
parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024], default=512)
|
||||
parser.add_argument("--stream", help="Uses streaming when generating tokens. Only for the Kobold Lite UI.", action='store_true')
|
||||
parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
|
||||
parser.add_argument("--unbantokens", help="Normally, KoboldAI prevents certain tokens such as EOS and Square Brackets. This flag unbans them.", action='store_true')
|
||||
parser.add_argument("--unbantokens", help="Normally, KoboldAI prevents the EOS token from being generated. This flag unbans it.", action='store_true')
|
||||
parser.add_argument("--bantokens", help="You can manually specify a list of token SUBSTRINGS that the AI cannot use. This bans ALL instances of that substring.", metavar=('[token_substrings]'), nargs='+')
|
||||
parser.add_argument("--usemirostat", help="Experimental! Replaces your samplers with mirostat. Takes 3 params = [type(0/1/2), tau(5.0), eta(0.1)].",metavar=('[type]', '[tau]', '[eta]'), type=float, nargs=3)
|
||||
parser.add_argument("--forceversion", help="If the model file format detection fails (e.g. rogue modified model) you can set this to override the detected format (enter desired version, e.g. 401 for GPTNeoX-Type2).",metavar=('[version]'), type=int, default=0)
|
||||
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
|
||||
|
@ -881,7 +1332,7 @@ if __name__ == '__main__':
|
|||
compatgroup = parser.add_mutually_exclusive_group()
|
||||
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
|
||||
compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
|
||||
compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires Nvidia GPU. Select lowvram to not allocate VRAM scratch buffer.", default='', const='normal', nargs='?', choices=['normal', 'lowvram'])
|
||||
compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires Nvidia GPU. Select lowvram to not allocate VRAM scratch buffer. Enter a number after to select a different main GPU.", nargs='*',metavar=('[lowvram|normal] [main GPU ID]'), choices=['normal', 'lowvram', '0', '1', '2'])
|
||||
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue