mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
added support for server side save slots
This commit is contained in:
parent
5ee7cbe08c
commit
ccd2dbe020
3 changed files with 705 additions and 96 deletions
148
koboldcpp.py
148
koboldcpp.py
|
@ -40,6 +40,7 @@ logprobs_max = 5
|
|||
default_draft_amount = 8
|
||||
default_ttsmaxlen = 4096
|
||||
default_visionmaxres = 1024
|
||||
net_save_slots = 8
|
||||
|
||||
# abuse prevention
|
||||
stop_token_max = 256
|
||||
|
@ -48,7 +49,7 @@ logit_bias_max = 512
|
|||
dry_seq_break_max = 128
|
||||
|
||||
# global vars
|
||||
KcppVersion = "1.84.2"
|
||||
KcppVersion = "1.85"
|
||||
showdebug = True
|
||||
kcpp_instance = None #global running instance
|
||||
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False}
|
||||
|
@ -86,6 +87,7 @@ runmode_untouched = True
|
|||
modelfile_extracted_meta = None
|
||||
importvars_in_progress = False
|
||||
has_multiplayer = False
|
||||
savedata_obj = None
|
||||
multiplayer_story_data_compressed = None #stores the full compressed story of the current multiplayer session
|
||||
multiplayer_turn_major = 1 # to keep track of when a client needs to sync their stories
|
||||
multiplayer_turn_minor = 1
|
||||
|
@ -676,7 +678,7 @@ def string_contains_or_overlaps_sequence_substring(inputstr, sequences):
|
|||
return False
|
||||
|
||||
def get_capabilities():
|
||||
global has_multiplayer, KcppVersion, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath
|
||||
global savedata_obj, has_multiplayer, KcppVersion, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath
|
||||
has_llm = not (friendlymodelname=="inactive")
|
||||
has_txt2img = not (friendlysdmodelname=="inactive" or fullsdmodelpath=="")
|
||||
has_vision = (mmprojpath!="")
|
||||
|
@ -685,7 +687,7 @@ def get_capabilities():
|
|||
has_search = True if args.websearch else False
|
||||
has_tts = (ttsmodelpath!="")
|
||||
admin_type = (2 if args.admin and args.admindir and args.adminpassword else (1 if args.admin and args.admindir else 0))
|
||||
return {"result":"KoboldCpp", "version":KcppVersion, "protected":has_password, "llm":has_llm, "txt2img":has_txt2img,"vision":has_vision,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search,"tts":has_tts, "admin": admin_type}
|
||||
return {"result":"KoboldCpp", "version":KcppVersion, "protected":has_password, "llm":has_llm, "txt2img":has_txt2img,"vision":has_vision,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search,"tts":has_tts, "savedata":(savedata_obj is not None), "admin": admin_type}
|
||||
|
||||
def dump_gguf_metadata(file_path): #if you're gonna copy this into your own project at least credit concedo
|
||||
chunk_size = 1024*1024*12 # read first 12mb of file
|
||||
|
@ -2362,7 +2364,7 @@ Enter Prompt:<br>
|
|||
def do_GET(self):
|
||||
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui
|
||||
global last_req_time, start_time
|
||||
global has_multiplayer, multiplayer_turn_major, multiplayer_turn_minor, multiplayer_story_data_compressed, multiplayer_dataformat, multiplayer_lastactive, maxctx, maxhordelen, friendlymodelname, lastgeneratedcomfyimg, KcppVersion, totalgens, preloaded_story, exitcounter, currentusergenkey, friendlysdmodelname, fullsdmodelpath, mmprojpath, password
|
||||
global savedata_obj, has_multiplayer, multiplayer_turn_major, multiplayer_turn_minor, multiplayer_story_data_compressed, multiplayer_dataformat, multiplayer_lastactive, maxctx, maxhordelen, friendlymodelname, lastgeneratedcomfyimg, KcppVersion, totalgens, preloaded_story, exitcounter, currentusergenkey, friendlysdmodelname, fullsdmodelpath, mmprojpath, password
|
||||
self.path = self.path.rstrip('/')
|
||||
response_body = None
|
||||
content_type = 'application/json'
|
||||
|
@ -2549,7 +2551,7 @@ Enter Prompt:<br>
|
|||
return
|
||||
|
||||
def do_POST(self):
|
||||
global modelbusy, requestsinqueue, currentusergenkey, totalgens, pendingabortkey, lastgeneratedcomfyimg, multiplayer_turn_major, multiplayer_turn_minor, multiplayer_story_data_compressed, multiplayer_dataformat, multiplayer_lastactive
|
||||
global modelbusy, requestsinqueue, currentusergenkey, totalgens, pendingabortkey, lastgeneratedcomfyimg, multiplayer_turn_major, multiplayer_turn_minor, multiplayer_story_data_compressed, multiplayer_dataformat, multiplayer_lastactive, net_save_slots
|
||||
contlenstr = self.headers['content-length']
|
||||
content_length = 0
|
||||
body = None
|
||||
|
@ -2707,6 +2709,84 @@ Enter Prompt:<br>
|
|||
multiplayer_lastactive[sender] = int(time.time())
|
||||
response_body = (json.dumps({"turn_major":multiplayer_turn_major,"turn_minor":multiplayer_turn_minor,"idle":self.get_multiplayer_idle_state(sender),"data_format":multiplayer_dataformat}).encode())
|
||||
|
||||
elif self.path.endswith('/api/extra/data/list'):
|
||||
if not self.secure_endpoint():
|
||||
return
|
||||
if savedata_obj is None:
|
||||
response_body = (json.dumps([]).encode())
|
||||
return
|
||||
output = []
|
||||
for i in range (net_save_slots):
|
||||
if str(i) in savedata_obj:
|
||||
output.append(savedata_obj[str(i)]["title"])
|
||||
else:
|
||||
output.append("")
|
||||
response_body = (json.dumps(output).encode())
|
||||
|
||||
elif self.path.endswith('/api/extra/data/load'):
|
||||
if not self.secure_endpoint():
|
||||
return
|
||||
if savedata_obj is None:
|
||||
response_body = (json.dumps({"success":False,"data":None}).encode())
|
||||
loadid = -1
|
||||
try:
|
||||
tempbody = json.loads(body)
|
||||
loadid = tryparseint(tempbody.get('slot', 0))
|
||||
except Exception:
|
||||
loadid = -1
|
||||
if loadid < 0 or str(loadid) not in savedata_obj:
|
||||
response_body = (json.dumps({"success":False,"data":None}).encode())
|
||||
else:
|
||||
response_body = (json.dumps({"success":True,"data":savedata_obj[str(loadid)]}).encode())
|
||||
|
||||
elif self.path.endswith('/api/extra/data/save'):
|
||||
if not self.secure_endpoint():
|
||||
return
|
||||
if savedata_obj is None:
|
||||
response_code = 400
|
||||
response_body = (json.dumps({"success":False, "error":"SaveDataFile not enabled!"}).encode())
|
||||
else:
|
||||
try:
|
||||
incoming_story = json.loads(body) # ensure submitted data is valid json
|
||||
slotid = tryparseint(incoming_story.get('slot', -1))
|
||||
dataformat = incoming_story.get('format', "")
|
||||
title = incoming_story.get('title', "")
|
||||
if not title or title=="":
|
||||
title = "Untitled Save"
|
||||
storybody = incoming_story.get('data', None) #should be a compressed string
|
||||
if slotid >= 0 and slotid < net_save_slots: # we shall provide 4 network save slots
|
||||
saveneeded = False
|
||||
if storybody and storybody!="":
|
||||
storybody = str(storybody)
|
||||
if len(storybody) > (1024*1024*8): #limit story to 8mb
|
||||
response_code = 400
|
||||
response_body = (json.dumps({"success":False, "error":"Story is too long!"}).encode())
|
||||
else:
|
||||
savedata_obj[str(slotid)] = {"title":title, "format":dataformat, "data":storybody}
|
||||
saveneeded = True
|
||||
else: #erasing existing story
|
||||
if str(slotid) in savedata_obj:
|
||||
savedata_obj.pop(str(slotid))
|
||||
saveneeded = True
|
||||
if saveneeded:
|
||||
if args.savedatafile and os.path.exists(args.savedatafile):
|
||||
with open(args.savedatafile, 'w+', encoding='utf-8', errors='ignore') as f:
|
||||
json.dump(savedata_obj, f)
|
||||
print(f"Data was saved to slot {slotid}")
|
||||
response_body = (json.dumps({"success":True, "error":""}).encode())
|
||||
else:
|
||||
response_code = 400
|
||||
response_body = (json.dumps({"success":False, "error":"SaveDataFile is missing!"}).encode())
|
||||
else:
|
||||
response_body = (json.dumps({"success":True, "error":""}).encode())
|
||||
else:
|
||||
response_code = 400
|
||||
response_body = (json.dumps({"success":False, "error":"No story submitted or invalid slot!"}).encode())
|
||||
except Exception as e:
|
||||
utfprint("Remote Save Story - Body Error: " + str(e))
|
||||
response_code = 400
|
||||
response_body = (json.dumps({"success": False, "error":"Submitted story invalid!"}).encode())
|
||||
|
||||
elif self.path.endswith('/api/extra/multiplayer/getstory'):
|
||||
if not self.secure_endpoint():
|
||||
return
|
||||
|
@ -3097,7 +3177,7 @@ def show_gui():
|
|||
global using_gui_launcher
|
||||
using_gui_launcher = True
|
||||
from tkinter.filedialog import askopenfilename, askdirectory
|
||||
from tkinter.filedialog import asksaveasfile
|
||||
from tkinter.filedialog import asksaveasfilename
|
||||
|
||||
# if args received, launch
|
||||
if len(sys.argv) != 1 and not args.showgui:
|
||||
|
@ -3214,7 +3294,7 @@ def show_gui():
|
|||
|
||||
tabs = ctk.CTkFrame(root, corner_radius = 0, width=windowwidth, height=windowheight-50)
|
||||
tabs.grid(row=0, stick="nsew")
|
||||
tabnames= ["Quick Launch", "Hardware", "Tokens", "Model Files", "Network", "Horde Worker","Image Gen","Audio","Admin","Extra"]
|
||||
tabnames= ["Quick Launch", "Hardware", "Tokens", "Loaded Files", "Network", "Horde Worker","Image Gen","Audio","Admin","Extra"]
|
||||
navbuttons = {}
|
||||
navbuttonframe = ctk.CTkFrame(tabs, width=100, height=int(tabs.cget("height")))
|
||||
navbuttonframe.grid(row=0, column=0, padx=2,pady=2)
|
||||
|
@ -3276,6 +3356,7 @@ def show_gui():
|
|||
lora_var = ctk.StringVar()
|
||||
lora_base_var = ctk.StringVar()
|
||||
preloadstory_var = ctk.StringVar()
|
||||
savedatafile_var = ctk.StringVar()
|
||||
mmproj_var = ctk.StringVar()
|
||||
visionmaxres_var = ctk.StringVar(value=str(default_visionmaxres))
|
||||
draftmodel_var = ctk.StringVar()
|
||||
|
@ -3385,14 +3466,22 @@ def show_gui():
|
|||
entry.grid(row=row, column=(0 if singleline else 1), padx=padx, sticky="nw")
|
||||
return entry, label
|
||||
|
||||
def makefileentry(parent, text, searchtext, var, row=0, width=200, filetypes=[], onchoosefile=None, singlerow=False, singlecol=True, is_dir=False, tooltiptxt=""):
|
||||
#file dialog types: 0=openfile,1=savefile,2=opendir
|
||||
def makefileentry(parent, text, searchtext, var, row=0, width=200, filetypes=[], onchoosefile=None, singlerow=False, singlecol=True, dialog_type=0, tooltiptxt=""):
|
||||
label = makelabel(parent, text, row,0,tooltiptxt,columnspan=3)
|
||||
def getfilename(var, text):
|
||||
initialDir = os.path.dirname(var.get())
|
||||
initialDir = initialDir if os.path.isdir(initialDir) else None
|
||||
fnam = None
|
||||
if is_dir:
|
||||
if dialog_type==2:
|
||||
fnam = askdirectory(title=text, mustexist=True, initialdir=initialDir)
|
||||
elif dialog_type==1:
|
||||
fnam = asksaveasfilename(title=text, filetypes=filetypes, defaultextension=filetypes, initialdir=initialDir)
|
||||
if not fnam:
|
||||
fnam = ""
|
||||
else:
|
||||
fnam = str(fnam).strip()
|
||||
fnam = f"{fnam}.jsondb" if ".jsondb" not in fnam.lower() else fnam
|
||||
else:
|
||||
fnam = askopenfilename(title=text,filetypes=filetypes, initialdir=initialDir)
|
||||
if fnam:
|
||||
|
@ -3777,7 +3866,7 @@ def show_gui():
|
|||
togglectxshift(1,1,1)
|
||||
|
||||
# Model Tab
|
||||
model_tab = tabcontent["Model Files"]
|
||||
model_tab = tabcontent["Loaded Files"]
|
||||
|
||||
makefileentry(model_tab, "Text Model:", "Select GGUF or GGML Model File", model_var, 1,width=280,singlerow=True, onchoosefile=on_picked_model_file,tooltiptxt="Select a GGUF or GGML model file on disk to be loaded.")
|
||||
makefileentry(model_tab, "Text Lora:", "Select Lora File",lora_var, 3,width=280,singlerow=True,tooltiptxt="Select an optional GGML Text LoRA adapter to use.\nLeave blank to skip.")
|
||||
|
@ -3789,6 +3878,7 @@ def show_gui():
|
|||
makelabelentry(model_tab, "Splits: ", draftgpusplit_str_vars, 13, 50,padx=210,singleline=True,tooltip="Distribution of draft model layers. Leave blank to follow main model's gpu split. Only works if multi-gpu (All) selected in main model.", labelpadx=160)
|
||||
makelabelentry(model_tab, "Layers: ", draftgpulayers_var, 13, 50,padx=320,singleline=True,tooltip="How many layers to GPU offload for the draft model", labelpadx=270)
|
||||
makefileentry(model_tab, "Preload Story:", "Select Preloaded Story File", preloadstory_var, 15,width=280,singlerow=True,tooltiptxt="Select an optional KoboldAI JSON savefile \nto be served on launch to any client.")
|
||||
makefileentry(model_tab, "SaveData File:", "Select or Create New SaveData Database File", savedatafile_var, 17,width=280,filetypes=[("KoboldCpp SaveDB", "*.jsondb")],singlerow=True,dialog_type=1,tooltiptxt="Selecting a file will allow data to be loaded and saved persistently to this KoboldCpp server remotely. File is created if it does not exist.")
|
||||
makefileentry(model_tab, "ChatCompletions Adapter:", "Select ChatCompletions Adapter File", chatcompletionsadapter_var, 24, width=250, filetypes=[("JSON Adapter", "*.json")], tooltiptxt="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.")
|
||||
def pickpremadetemplate():
|
||||
initialDir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'kcpp_adapters')
|
||||
|
@ -3909,7 +3999,7 @@ def show_gui():
|
|||
admin_tab = tabcontent["Admin"]
|
||||
makecheckbox(admin_tab, "Enable Model Administration", admin_var, 1, 0,tooltiptxt="Enable a admin server, allowing you to remotely relaunch and swap models and configs.")
|
||||
makelabelentry(admin_tab, "Admin Password:" , admin_password_var, 3, 150,padx=120,singleline=True,tooltip="Require a password to access admin functions. You are strongly advised to use one for publically accessible instances!")
|
||||
makefileentry(admin_tab, "Config Directory:", "Select directory containing .kcpps files to relaunch from", admin_dir_var, 5, width=280, is_dir=True, tooltiptxt="Specify a directory to look for .kcpps configs in, which can be used to swap models.")
|
||||
makefileentry(admin_tab, "Config Directory:", "Select directory containing .kcpps files to relaunch from", admin_dir_var, 5, width=280, dialog_type=2, tooltiptxt="Specify a directory to look for .kcpps configs in, which can be used to swap models.")
|
||||
|
||||
def kcpp_export_template():
|
||||
nonlocal kcpp_exporting_template
|
||||
|
@ -3937,10 +4027,10 @@ def show_gui():
|
|||
savdict["draftgpusplit"] = None
|
||||
savdict["config"] = None
|
||||
savdict["ttsthreads"] = 0
|
||||
filename = asksaveasfile(filetypes=file_type, defaultextension=file_type)
|
||||
if filename is None:
|
||||
filename = asksaveasfilename(filetypes=file_type, defaultextension=file_type)
|
||||
if not filename:
|
||||
return
|
||||
filenamestr = str(filename.name).strip()
|
||||
filenamestr = str(filename).strip()
|
||||
filenamestr = f"{filenamestr}.kcppt" if ".kcppt" not in filenamestr.lower() else filenamestr
|
||||
file = open(filenamestr, 'a')
|
||||
file.write(json.dumps(savdict))
|
||||
|
@ -4067,6 +4157,7 @@ def show_gui():
|
|||
args.model_param = None if model_var.get() == "" else model_var.get()
|
||||
args.lora = None if lora_var.get() == "" else ([lora_var.get()] if lora_base_var.get()=="" else [lora_var.get(), lora_base_var.get()])
|
||||
args.preloadstory = None if preloadstory_var.get() == "" else preloadstory_var.get()
|
||||
args.savedatafile = None if savedatafile_var.get() == "" else savedatafile_var.get()
|
||||
try:
|
||||
if kcpp_exporting_template and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory):
|
||||
print("Embedding preload story...") # parse and save embedded preload story
|
||||
|
@ -4274,6 +4365,7 @@ def show_gui():
|
|||
|
||||
password_var.set(dict["password"] if ("password" in dict and dict["password"]) else "")
|
||||
preloadstory_var.set(dict["preloadstory"] if ("preloadstory" in dict and dict["preloadstory"]) else "")
|
||||
savedatafile_var.set(dict["savedatafile"] if ("savedatafile" in dict and dict["savedatafile"]) else "")
|
||||
chatcompletionsadapter_var.set(dict["chatcompletionsadapter"] if ("chatcompletionsadapter" in dict and dict["chatcompletionsadapter"]) else "")
|
||||
port_var.set(dict["port_param"] if ("port_param" in dict and dict["port_param"]) else defaultport)
|
||||
host_var.set(dict["host"] if ("host" in dict and dict["host"]) else "")
|
||||
|
@ -4324,10 +4416,10 @@ def show_gui():
|
|||
export_vars()
|
||||
savdict = json.loads(json.dumps(args.__dict__))
|
||||
file_type = [("KoboldCpp Settings", "*.kcpps")]
|
||||
filename = asksaveasfile(filetypes=file_type, defaultextension=file_type)
|
||||
if filename is None:
|
||||
filename = asksaveasfilename(filetypes=file_type, defaultextension=file_type)
|
||||
if not filename:
|
||||
return
|
||||
filenamestr = str(filename.name).strip()
|
||||
filenamestr = str(filename).strip()
|
||||
filenamestr = f"{filenamestr}.kcpps" if ".kcpps" not in filenamestr.lower() else filenamestr
|
||||
file = open(filenamestr, 'a')
|
||||
file.write(json.dumps(savdict))
|
||||
|
@ -5282,7 +5374,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
|||
friendlymodelname = "koboldcpp/" + sanitize_string(newmdldisplayname)
|
||||
|
||||
# horde worker settings
|
||||
global maxhordelen, maxhordectx, showdebug, has_multiplayer
|
||||
global maxhordelen, maxhordectx, showdebug, has_multiplayer, savedata_obj
|
||||
if args.hordemodelname and args.hordemodelname!="":
|
||||
friendlymodelname = args.hordemodelname
|
||||
if args.debugmode == 1:
|
||||
|
@ -5303,6 +5395,25 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
|||
if args.multiplayer:
|
||||
has_multiplayer = True
|
||||
|
||||
if args.savedatafile and isinstance(args.savedatafile, str):
|
||||
filepath = args.savedatafile
|
||||
try:
|
||||
with open(filepath, 'r+', encoding='utf-8', errors='ignore') as f:
|
||||
loaded = json.load(f)
|
||||
savedata_obj = loaded
|
||||
print(f"Loaded existing savedatafile at '{filepath}'.")
|
||||
except FileNotFoundError:
|
||||
try:
|
||||
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||
with open(filepath, 'w+', encoding='utf-8', errors='ignore') as f:
|
||||
savedata_obj = {}
|
||||
print(f"File '{filepath}' did not exist. Created new savedatafile.")
|
||||
json.dump(savedata_obj, f)
|
||||
except Exception as e:
|
||||
print(f"Failed to create savedatafile '{filepath}': {e}")
|
||||
except Exception as e:
|
||||
print(f"Failed to access savedatafile '{filepath}': {e}")
|
||||
|
||||
if args.highpriority:
|
||||
print("Setting process to Higher Priority - Use Caution")
|
||||
try:
|
||||
|
@ -5812,6 +5923,7 @@ if __name__ == '__main__':
|
|||
advparser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')
|
||||
advparser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true')
|
||||
advparser.add_argument("--preloadstory", metavar=('[savefile]'), help="Configures a prepared story json save file to be hosted on the server, which frontends (such as KoboldAI Lite) can access over the API.", default="")
|
||||
advparser.add_argument("--savedatafile", metavar=('[savefile]'), help="If enabled, creates or opens a persistent database file on the server, that allows users to save and load their data remotely.", default="")
|
||||
advparser.add_argument("--quiet", help="Enable quiet mode, which hides generation inputs and outputs in the terminal. Quiet mode is automatically enabled when running a horde worker.", action='store_true')
|
||||
advparser.add_argument("--ssl", help="Allows all content to be served over SSL instead. A valid UNENCRYPTED SSL cert and key .pem files must be provided", metavar=('[cert_pem]', '[key_pem]'), nargs='+')
|
||||
advparser.add_argument("--nocertify", help="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.", action='store_true')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue