mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-10 04:00:53 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # examples/model-conversion/Makefile # examples/model-conversion/scripts/causal/convert-model.sh # ggml/src/ggml-cann/aclnn_ops.cpp # ggml/src/ggml-cann/common.h # ggml/src/ggml-cann/ggml-cann.cpp # ggml/src/ggml-cuda/CMakeLists.txt # scripts/compare-commits.sh
This commit is contained in:
commit
3060dfb99f
28 changed files with 399 additions and 261 deletions
|
|
@ -64,7 +64,7 @@ dry_seq_break_max = 128
|
|||
extra_images_max = 4
|
||||
|
||||
# global vars
|
||||
KcppVersion = "1.98.1"
|
||||
KcppVersion = "1.99"
|
||||
showdebug = True
|
||||
kcpp_instance = None #global running instance
|
||||
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_config_target":""}
|
||||
|
|
@ -4554,7 +4554,7 @@ def show_gui():
|
|||
chatcompletionsadapter_var = ctk.StringVar(value="AutoGuess")
|
||||
moeexperts_var = ctk.StringVar(value=str(-1))
|
||||
moecpu_var = ctk.StringVar(value=str(0))
|
||||
defaultgenamt_var = ctk.StringVar(value=str(640))
|
||||
defaultgenamt_var = ctk.StringVar(value=str(768))
|
||||
nobostoken_var = ctk.IntVar(value=0)
|
||||
override_kv_var = ctk.StringVar(value="")
|
||||
override_tensors_var = ctk.StringVar(value="")
|
||||
|
|
@ -5558,7 +5558,7 @@ def show_gui():
|
|||
args.overridenativecontext = 0
|
||||
args.moeexperts = int(moeexperts_var.get()) if moeexperts_var.get()!="" else -1
|
||||
args.moecpu = int(moecpu_var.get()) if moecpu_var.get()!="" else 0
|
||||
args.defaultgenamt = int(defaultgenamt_var.get()) if defaultgenamt_var.get()!="" else 640
|
||||
args.defaultgenamt = int(defaultgenamt_var.get()) if defaultgenamt_var.get()!="" else 768
|
||||
args.nobostoken = (nobostoken_var.get()==1)
|
||||
args.enableguidance = (enableguidance_var.get()==1)
|
||||
args.overridekv = None if override_kv_var.get() == "" else override_kv_var.get()
|
||||
|
|
@ -7625,7 +7625,7 @@ if __name__ == '__main__':
|
|||
advparser.add_argument("--nomodel", help="Allows you to launch the GUI alone, without selecting any model.", action='store_true')
|
||||
advparser.add_argument("--moeexperts", metavar=('[num of experts]'), help="How many experts to use for MoE models (default=follow gguf)", type=int, default=-1)
|
||||
advparser.add_argument("--moecpu", metavar=('[layers affected]'), help="Keep the Mixture of Experts (MoE) weights of the first N layers in the CPU. If no value is provided, applies to all layers.", nargs='?', const=999, type=int, default=0)
|
||||
advparser.add_argument("--defaultgenamt", help="How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.", type=check_range(int,64,8192), default=640)
|
||||
advparser.add_argument("--defaultgenamt", help="How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.", type=check_range(int,64,8192), default=768)
|
||||
advparser.add_argument("--nobostoken", help="Prevents BOS token from being added at the start of any prompt. Usually NOT recommended for most models.", action='store_true')
|
||||
advparser.add_argument("--enableguidance", help="Enables the use of Classifier-Free-Guidance, which allows the use of negative prompts. Has performance and memory impact.", action='store_true')
|
||||
advparser.add_argument("--maxrequestsize", metavar=('[size in MB]'), help="Specify a max request payload size. Any requests to the server larger than this size will be dropped. Do not change if unsure.", type=int, default=32)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue