mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
fixed embeddings, added new parameter to limit max embeddings context
This commit is contained in:
parent
8780b33c64
commit
7d8aa31f1f
6 changed files with 360 additions and 9 deletions
13
koboldcpp.py
13
koboldcpp.py
|
@ -56,7 +56,7 @@ logit_bias_max = 512
|
|||
dry_seq_break_max = 128
|
||||
|
||||
# global vars
|
||||
KcppVersion = "1.93.1"
|
||||
KcppVersion = "1.93.2"
|
||||
showdebug = True
|
||||
kcpp_instance = None #global running instance
|
||||
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False}
|
||||
|
@ -350,6 +350,7 @@ class embeddings_load_model_inputs(ctypes.Structure):
|
|||
("gpulayers", ctypes.c_int),
|
||||
("flash_attention", ctypes.c_bool),
|
||||
("use_mmap", ctypes.c_bool),
|
||||
("embeddingsmaxctx", ctypes.c_int),
|
||||
("quiet", ctypes.c_bool),
|
||||
("debugmode", ctypes.c_int)]
|
||||
|
||||
|
@ -1742,6 +1743,7 @@ def embeddings_load_model(model_filename):
|
|||
inputs.flash_attention = False
|
||||
inputs.threads = args.threads
|
||||
inputs.use_mmap = args.usemmap
|
||||
inputs.embeddingsmaxctx = args.embeddingsmaxctx
|
||||
inputs = set_backend_props(inputs)
|
||||
ret = handle.embeddings_load_model(inputs)
|
||||
return ret
|
||||
|
@ -4245,6 +4247,7 @@ def show_gui():
|
|||
ttsmaxlen_var = ctk.StringVar(value=str(default_ttsmaxlen))
|
||||
|
||||
embeddings_model_var = ctk.StringVar()
|
||||
embeddings_ctx_var = ctk.StringVar(value=str(""))
|
||||
|
||||
admin_var = ctk.IntVar(value=0)
|
||||
admin_dir_var = ctk.StringVar()
|
||||
|
@ -4852,7 +4855,8 @@ def show_gui():
|
|||
makelabelentry(model_tab, "Draft Amount: ", draftamount_var, 13, 50,padx=100,singleline=True,tooltip="How many tokens to draft per chunk before verifying results")
|
||||
makelabelentry(model_tab, "Splits: ", draftgpusplit_str_vars, 13, 50,padx=210,singleline=True,tooltip="Distribution of draft model layers. Leave blank to follow main model's gpu split. Only works if multi-gpu (All) selected in main model.", labelpadx=160)
|
||||
makelabelentry(model_tab, "Layers: ", draftgpulayers_var, 13, 50,padx=320,singleline=True,tooltip="How many layers to GPU offload for the draft model", labelpadx=270)
|
||||
makefileentry(model_tab, "Embeds Model:", "Select Embeddings Model File", embeddings_model_var, 15, width=280,singlerow=True, filetypes=[("*.gguf","*.gguf")], tooltiptxt="Select an embeddings GGUF model that can be used to generate embedding vectors.")
|
||||
makefileentry(model_tab, "Embeds Model:", "Select Embeddings Model File", embeddings_model_var, 15, width=160,singlerow=True, filetypes=[("*.gguf","*.gguf")], tooltiptxt="Select an embeddings GGUF model that can be used to generate embedding vectors.")
|
||||
makelabelentry(model_tab, "EmbdCtx: ", embeddings_ctx_var, 15, 50,padx=390,singleline=True,tooltip="If set above 0, limits max context for embedding model to save memory.", labelpadx=330)
|
||||
makefileentry(model_tab, "Preload Story:", "Select Preloaded Story File", preloadstory_var, 17,width=280,singlerow=True,tooltiptxt="Select an optional KoboldAI JSON savefile \nto be served on launch to any client.")
|
||||
makefileentry(model_tab, "SaveData File:", "Select or Create New SaveData Database File", savedatafile_var, 19,width=280,filetypes=[("KoboldCpp SaveDB", "*.jsondb")],singlerow=True,dialog_type=1,tooltiptxt="Selecting a file will allow data to be loaded and saved persistently to this KoboldCpp server remotely. File is created if it does not exist.")
|
||||
makefileentry(model_tab, "ChatCompletions Adapter:", "Select ChatCompletions Adapter File", chatcompletionsadapter_var, 24, width=250, filetypes=[("JSON Adapter", "*.json")], tooltiptxt="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.")
|
||||
|
@ -5207,6 +5211,9 @@ def show_gui():
|
|||
if embeddings_model_var.get() != "":
|
||||
args.embeddingsmodel = embeddings_model_var.get()
|
||||
|
||||
if embeddings_ctx_var.get() != "":
|
||||
args.embeddingsmaxctx = (0 if embeddings_ctx_var.get()=="" else int(embeddings_ctx_var.get()))
|
||||
|
||||
if tts_model_var.get() != "" and wavtokenizer_var.get() != "":
|
||||
args.ttsthreads = (0 if tts_threads_var.get()=="" else int(tts_threads_var.get()))
|
||||
args.ttsmodel = tts_model_var.get()
|
||||
|
@ -5401,6 +5408,7 @@ def show_gui():
|
|||
ttsmaxlen_var.set(str(dict["ttsmaxlen"]) if ("ttsmaxlen" in dict and dict["ttsmaxlen"]) else str(default_ttsmaxlen))
|
||||
|
||||
embeddings_model_var.set(dict["embeddingsmodel"] if ("embeddingsmodel" in dict and dict["embeddingsmodel"]) else "")
|
||||
embeddings_ctx_var.set(str(dict["embeddingsmaxctx"]) if ("embeddingsmaxctx" in dict and dict["embeddingsmaxctx"]) else "")
|
||||
|
||||
admin_var.set(dict["admin"] if ("admin" in dict) else 0)
|
||||
admin_dir_var.set(dict["admindir"] if ("admindir" in dict and dict["admindir"]) else "")
|
||||
|
@ -7139,6 +7147,7 @@ if __name__ == '__main__':
|
|||
|
||||
embeddingsparsergroup = parser.add_argument_group('Embeddings Model Commands')
|
||||
embeddingsparsergroup.add_argument("--embeddingsmodel", metavar=('[filename]'), help="Specify an embeddings model to be loaded for generating embedding vectors.", default="")
|
||||
embeddingsparsergroup.add_argument("--embeddingsmaxctx", metavar=('[amount]'), help="Overrides the default maximum supported context of an embeddings model (defaults to trained context).", type=int, default=0)
|
||||
|
||||
admingroup = parser.add_argument_group('Administration Commands')
|
||||
admingroup.add_argument("--admin", help="Enables admin mode, allowing you to unload and reload different configurations or models.", action='store_true')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue