From c4df15129838a4f678baafb391e48e04fac2fcb0 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Fri, 23 May 2025 21:33:26 +0800 Subject: [PATCH] experimental swa flag --- expose.h | 1 + gpttype_adapter.cpp | 6 +++++- koboldcpp.py | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/expose.h b/expose.h index 19393433e..43ff75e31 100644 --- a/expose.h +++ b/expose.h @@ -70,6 +70,7 @@ struct load_model_inputs const int quant_k = 0; const int quant_v = 0; const bool check_slowness = false; + const bool swa_support = false; const bool quiet = false; const int debugmode = 0; }; diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 40558682f..33bb645e3 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1927,7 +1927,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in kcpp_data->use_smartcontext = inputs.use_smartcontext; kcpp_data->use_contextshift = inputs.use_contextshift; kcpp_data->use_fastforward = inputs.use_fastforward; - kcpp_data->swa_full = (inputs.use_fastforward || inputs.use_contextshift)?true:false; + kcpp_data->swa_full = !inputs.swa_support;//(inputs.use_fastforward || inputs.use_contextshift)?true:false; + if(!kcpp_data->swa_full) + { + printf("\n!!!!!!!!!!!!!!!!!!!\nExperimental FLAG - SWA SUPPORT IS ENABLED!\n!!!!!!!!!!!!!!!!!!!\n"); + } debugmode = inputs.debugmode; draft_ctx = nullptr; guidance_ctx = nullptr; diff --git a/koboldcpp.py b/koboldcpp.py index ea5f1c027..e3b4adf50 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -192,6 +192,7 @@ class load_model_inputs(ctypes.Structure): ("quant_k", ctypes.c_int), ("quant_v", ctypes.c_int), ("check_slowness", ctypes.c_bool), + ("swa_support", ctypes.c_bool), ("quiet", ctypes.c_bool), ("debugmode", ctypes.c_int)] @@ -1248,6 +1249,7 @@ def load_model(model_filename): inputs.override_kv = args.overridekv.encode("UTF-8") if args.overridekv else "".encode("UTF-8") inputs.override_tensors = args.overridetensors.encode("UTF-8") if args.overridetensors else "".encode("UTF-8") inputs.check_slowness = (not args.highpriority and os.name == 'nt' and 'Intel' in platform.processor()) + inputs.swa_support = args.experiment_swa inputs = set_backend_props(inputs) ret = handle.load_model(inputs) return ret @@ -6907,6 +6909,9 @@ if __name__ == '__main__': admingroup.add_argument("--adminpassword", metavar=('[password]'), help="Require a password to access admin functions. You are strongly advised to use one for publically accessible instances!", default=None) admingroup.add_argument("--admindir", metavar=('[directory]'), help="Specify a directory to look for .kcpps configs in, which can be used to swap models.", default="") + experimentgroup = parser.add_argument_group('Experimental Commands, can change or break any time!') + experimentgroup.add_argument("--experiment_swa", help="Enables SWA mode. There are no safety checks.", action='store_true') + deprecatedgroup = parser.add_argument_group('Deprecated Commands, DO NOT USE!') deprecatedgroup.add_argument("--hordeconfig", help=argparse.SUPPRESS, nargs='+') deprecatedgroup.add_argument("--sdconfig", help=argparse.SUPPRESS, nargs='+')