Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	examples/model-conversion/Makefile
#	examples/model-conversion/scripts/causal/convert-model.sh
#	ggml/src/ggml-cann/aclnn_ops.cpp
#	ggml/src/ggml-cann/common.h
#	ggml/src/ggml-cann/ggml-cann.cpp
#	ggml/src/ggml-cuda/CMakeLists.txt
#	scripts/compare-commits.sh
This commit is contained in:
Concedo 2025-08-28 23:17:29 +08:00
commit 3060dfb99f
28 changed files with 399 additions and 261 deletions

View file

@ -64,7 +64,7 @@ dry_seq_break_max = 128
extra_images_max = 4
# global vars
KcppVersion = "1.98.1"
KcppVersion = "1.99"
showdebug = True
kcpp_instance = None #global running instance
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_config_target":""}
@ -4554,7 +4554,7 @@ def show_gui():
chatcompletionsadapter_var = ctk.StringVar(value="AutoGuess")
moeexperts_var = ctk.StringVar(value=str(-1))
moecpu_var = ctk.StringVar(value=str(0))
defaultgenamt_var = ctk.StringVar(value=str(640))
defaultgenamt_var = ctk.StringVar(value=str(768))
nobostoken_var = ctk.IntVar(value=0)
override_kv_var = ctk.StringVar(value="")
override_tensors_var = ctk.StringVar(value="")
@ -5558,7 +5558,7 @@ def show_gui():
args.overridenativecontext = 0
args.moeexperts = int(moeexperts_var.get()) if moeexperts_var.get()!="" else -1
args.moecpu = int(moecpu_var.get()) if moecpu_var.get()!="" else 0
args.defaultgenamt = int(defaultgenamt_var.get()) if defaultgenamt_var.get()!="" else 640
args.defaultgenamt = int(defaultgenamt_var.get()) if defaultgenamt_var.get()!="" else 768
args.nobostoken = (nobostoken_var.get()==1)
args.enableguidance = (enableguidance_var.get()==1)
args.overridekv = None if override_kv_var.get() == "" else override_kv_var.get()
@ -7625,7 +7625,7 @@ if __name__ == '__main__':
advparser.add_argument("--nomodel", help="Allows you to launch the GUI alone, without selecting any model.", action='store_true')
advparser.add_argument("--moeexperts", metavar=('[num of experts]'), help="How many experts to use for MoE models (default=follow gguf)", type=int, default=-1)
advparser.add_argument("--moecpu", metavar=('[layers affected]'), help="Keep the Mixture of Experts (MoE) weights of the first N layers in the CPU. If no value is provided, applies to all layers.", nargs='?', const=999, type=int, default=0)
advparser.add_argument("--defaultgenamt", help="How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.", type=check_range(int,64,8192), default=640)
advparser.add_argument("--defaultgenamt", help="How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.", type=check_range(int,64,8192), default=768)
advparser.add_argument("--nobostoken", help="Prevents BOS token from being added at the start of any prompt. Usually NOT recommended for most models.", action='store_true')
advparser.add_argument("--enableguidance", help="Enables the use of Classifier-Free-Guidance, which allows the use of negative prompts. Has performance and memory impact.", action='store_true')
advparser.add_argument("--maxrequestsize", metavar=('[size in MB]'), help="Specify a max request payload size. Any requests to the server larger than this size will be dropped. Do not change if unsure.", type=int, default=32)