From a4249abe5d6d1543c3a9028f6867b2c5a580801e Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 15 Sep 2024 21:25:48 +0800 Subject: [PATCH] alias noblas to usecpu --- koboldcpp.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 30f3cfb53..fb2958ac6 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -329,27 +329,27 @@ def init_library(): use_noavx2 = True if args.useclblast: if not file_exists(lib_clblast_noavx2) or (os.name=='nt' and not file_exists("clblast.dll")): - print("Warning: NoAVX2 CLBlast library file not found. Non-BLAS library will be used.") + print("Warning: NoAVX2 CLBlast library file not found. CPU library will be used.") else: print("Attempting to use NoAVX2 CLBlast library for faster prompt ingestion. A compatible clblast will be required.") use_clblast = True elif (args.usevulkan is not None): if not file_exists(lib_vulkan_noavx2): - print("Warning: NoAVX2 Vulkan library file not found. Non-BLAS library will be used.") + print("Warning: NoAVX2 Vulkan library file not found. CPU library will be used.") else: print("Attempting to use NoAVX2 Vulkan library for faster prompt ingestion. A compatible Vulkan will be required.") use_vulkan = True else: if not file_exists(lib_noavx2): print("Warning: NoAVX2 library file not found. Failsafe library will be used.") - elif (args.noblas and args.nommap): + elif (args.usecpu and args.nommap): use_failsafe = True print("!!! Attempting to use FAILSAFE MODE !!!") else: print("Attempting to use non-avx2 compatibility library.") elif (args.usecublas is not None): if not file_exists(lib_cublas) and not file_exists(lib_hipblas): - print("Warning: CuBLAS library file not found. Non-BLAS library will be used.") + print("Warning: CuBLAS library file not found. CPU library will be used.") else: if file_exists(lib_cublas): print("Attempting to use CuBLAS library for faster prompt ingestion. A compatible CuBLAS will be required.") @@ -359,18 +359,18 @@ def init_library(): use_hipblas = True elif (args.usevulkan is not None): if not file_exists(lib_vulkan): - print("Warning: Vulkan library file not found. Non-BLAS library will be used.") + print("Warning: Vulkan library file not found. CPU library will be used.") else: print("Attempting to use Vulkan library for faster prompt ingestion. A compatible Vulkan will be required.") use_vulkan = True elif args.useclblast: if not file_exists(lib_clblast) or (os.name=='nt' and not file_exists("clblast.dll")): - print("Warning: CLBlast library file not found. Non-BLAS library will be used.") + print("Warning: CLBlast library file not found. CPU library will be used.") else: print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.") use_clblast = True else: - print("Attempting to use Non-BLAS library.") + print("Attempting to use CPU library.") if use_noavx2: if use_failsafe: @@ -3026,12 +3026,12 @@ def show_gui(): if gpulayers_var.get(): args.gpulayers = int(gpulayers_var.get()) if runopts_var.get()=="Use CPU": - args.noblas = True + args.usecpu = True if runopts_var.get()=="Use CPU (Old CPU)": args.noavx2 = True if runopts_var.get()=="Failsafe Mode (Old CPU)": args.noavx2 = True - args.noblas = True + args.usecpu = True args.nommap = True if tensor_split_str_vars.get()!="": tssv = tensor_split_str_vars.get() @@ -3176,13 +3176,13 @@ def show_gui(): gpu_choice_var.set(str(opt+1)) break - elif "noavx2" in dict and "noblas" in dict and dict["noblas"] and dict["noavx2"]: + elif "noavx2" in dict and "usecpu" in dict and dict["usecpu"] and dict["noavx2"]: if failsafe_option is not None: runopts_var.set(failsafe_option) elif "noavx2" in dict and dict["noavx2"]: if noavx2_option is not None: runopts_var.set(noavx2_option) - elif "noblas" in dict and dict["noblas"]: + elif "usecpu" in dict and dict["usecpu"]: if default_option is not None: runopts_var.set(default_option) if "gpulayers" in dict and dict["gpulayers"]: @@ -3561,6 +3561,9 @@ def convert_outdated_args(args): if len(dict["hordeconfig"]) > 4: dict["hordekey"] = dict["hordeconfig"][3] dict["hordeworkername"] = dict["hordeconfig"][4] + if "noblas" in dict and dict["noblas"]: + dict["usecpu"] = True + check_deprecation_warning() return args @@ -4010,7 +4013,7 @@ def main(launch_args,start_server=True): if args.gpulayers: shouldavoidgpu = False - if args.noblas and sys.platform!="darwin": + if args.usecpu and sys.platform!="darwin": shouldavoidgpu = True if args.gpulayers and args.gpulayers>0: print("WARNING: GPU layers is set, but a GPU backend was not selected! GPU will not be used!") @@ -4026,7 +4029,7 @@ def main(launch_args,start_server=True): fetch_gpu_properties(False,True,True) pass if args.gpulayers==-1: - if MaxMemory[0] > 0 and (not args.noblas) and (args.usecublas or (args.usevulkan is not None) or args.useclblast or sys.platform=="darwin"): + if MaxMemory[0] > 0 and (not args.usecpu) and (args.usecublas or (args.usevulkan is not None) or args.useclblast or sys.platform=="darwin"): extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj) layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.blasbatchsize) print(f"Auto Recommended GPU Layers: {layeramt}") @@ -4291,7 +4294,7 @@ def main(launch_args,start_server=True): s_pp = float(benchmaxctx-benchlen)/t_pp s_gen = float(benchlen)/t_gen datetimestamp = datetime.now(timezone.utc) - benchflagstr = f"NoAVX2={args.noavx2} Threads={args.threads} HighPriority={args.highpriority} NoBlas={args.noblas} Cublas_Args={args.usecublas} Tensor_Split={args.tensor_split} BlasThreads={args.blasthreads} BlasBatchSize={args.blasbatchsize} FlashAttention={args.flashattention} KvCache={args.quantkv}" + benchflagstr = f"NoAVX2={args.noavx2} Threads={args.threads} HighPriority={args.highpriority} Cublas_Args={args.usecublas} Tensor_Split={args.tensor_split} BlasThreads={args.blasthreads} BlasBatchSize={args.blasbatchsize} FlashAttention={args.flashattention} KvCache={args.quantkv}" print(f"\nBenchmark Completed - v{KcppVersion} Results:\n======") print(f"Flags: {benchflagstr}") print(f"Timestamp: {datetimestamp}") @@ -4385,7 +4388,7 @@ if __name__ == '__main__': compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs. For hipBLAS binaries, please check YellowRoseCx rocm fork.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq] [rowsplit]'), choices=['normal', 'lowvram', '0', '1', '2', '3', 'mmq', 'rowsplit']) compatgroup.add_argument("--usevulkan", help="Use Vulkan for GPU Acceleration. Can optionally specify GPU Device ID (e.g. --usevulkan 0).", metavar=('[Device ID]'), nargs='*', type=int, default=None) compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2) - compatgroup.add_argument("--noblas", help="Do not use any GPU acceleration (CPU Only)", action='store_true') + compatgroup.add_argument("--usecpu", help="Do not use any GPU acceleration (CPU Only)", action='store_true') parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 4096). Supported values are [256,512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768,49152,65536,98304,131072]. IF YOU USE ANYTHING ELSE YOU ARE ON YOUR OWN.",metavar=('[256,512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768,49152,65536,98304,131072]'), type=check_range(int,256,262144), default=4096) parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU. Set to -1 to try autodetect, set to 0 to disable GPU offload.",metavar=('[GPU layers]'), nargs='?', const=1, type=int, default=-1) parser.add_argument("--tensor_split", help="For CUDA and Vulkan only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+') @@ -4450,5 +4453,6 @@ if __name__ == '__main__': deprecatedgroup = parser.add_argument_group('Deprecated Commands, DO NOT USE!') deprecatedgroup.add_argument("--hordeconfig", help=argparse.SUPPRESS, nargs='+') deprecatedgroup.add_argument("--sdconfig", help=argparse.SUPPRESS, nargs='+') + compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true') main(parser.parse_args(),start_server=True)