alias noblas to usecpu

2025-09-10 17:14:36 +00:00 · 2024-09-15 21:25:48 +08:00 · 2024-09-15 21:25:48 +08:00 · a4249abe5d
commit a4249abe5d
parent ab41e324d6
1 changed files with 19 additions and 15 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -329,27 +329,27 @@ def init_library():
        use_noavx2 = True
        if args.useclblast:
            if not file_exists(lib_clblast_noavx2) or (os.name=='nt' and not file_exists("clblast.dll")):
-                print("Warning: NoAVX2 CLBlast library file not found. Non-BLAS library will be used.")
+                print("Warning: NoAVX2 CLBlast library file not found. CPU library will be used.")
            else:
                print("Attempting to use NoAVX2 CLBlast library for faster prompt ingestion. A compatible clblast will be required.")
                use_clblast = True
        elif (args.usevulkan is not None):
            if not file_exists(lib_vulkan_noavx2):
-                print("Warning: NoAVX2 Vulkan library file not found. Non-BLAS library will be used.")
+                print("Warning: NoAVX2 Vulkan library file not found. CPU library will be used.")
            else:
                print("Attempting to use NoAVX2 Vulkan library for faster prompt ingestion. A compatible Vulkan will be required.")
                use_vulkan = True
        else:
            if not file_exists(lib_noavx2):
                print("Warning: NoAVX2 library file not found. Failsafe library will be used.")
-            elif (args.noblas and args.nommap):
+            elif (args.usecpu and args.nommap):
                use_failsafe = True
                print("!!! Attempting to use FAILSAFE MODE !!!")
            else:
                print("Attempting to use non-avx2 compatibility library.")
    elif (args.usecublas is not None):
        if not file_exists(lib_cublas) and not file_exists(lib_hipblas):
-            print("Warning: CuBLAS library file not found. Non-BLAS library will be used.")
+            print("Warning: CuBLAS library file not found. CPU library will be used.")
        else:
            if file_exists(lib_cublas):
                print("Attempting to use CuBLAS library for faster prompt ingestion. A compatible CuBLAS will be required.")
@ -359,18 +359,18 @@ def init_library():
                use_hipblas = True
    elif (args.usevulkan is not None):
        if not file_exists(lib_vulkan):
-            print("Warning: Vulkan library file not found. Non-BLAS library will be used.")
+            print("Warning: Vulkan library file not found. CPU library will be used.")
        else:
            print("Attempting to use Vulkan library for faster prompt ingestion. A compatible Vulkan will be required.")
            use_vulkan = True
    elif args.useclblast:
        if not file_exists(lib_clblast) or (os.name=='nt' and not file_exists("clblast.dll")):
-            print("Warning: CLBlast library file not found. Non-BLAS library will be used.")
+            print("Warning: CLBlast library file not found. CPU library will be used.")
        else:
            print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.")
            use_clblast = True
    else:
-        print("Attempting to use Non-BLAS library.")
+        print("Attempting to use CPU library.")

    if use_noavx2:
        if use_failsafe:
@ -3026,12 +3026,12 @@ def show_gui():
        if gpulayers_var.get():
            args.gpulayers = int(gpulayers_var.get())
        if runopts_var.get()=="Use CPU":
-            args.noblas = True
+            args.usecpu = True
        if runopts_var.get()=="Use CPU (Old CPU)":
            args.noavx2 = True
        if runopts_var.get()=="Failsafe Mode (Old CPU)":
            args.noavx2 = True
-            args.noblas = True
+            args.usecpu = True
            args.nommap = True
        if tensor_split_str_vars.get()!="":
            tssv = tensor_split_str_vars.get()
@ -3176,13 +3176,13 @@ def show_gui():
                            gpu_choice_var.set(str(opt+1))
                            break

-        elif  "noavx2" in dict and "noblas" in dict and dict["noblas"] and dict["noavx2"]:
+        elif  "noavx2" in dict and "usecpu" in dict and dict["usecpu"] and dict["noavx2"]:
            if failsafe_option is not None:
                runopts_var.set(failsafe_option)
        elif "noavx2" in dict and dict["noavx2"]:
            if noavx2_option is not None:
                runopts_var.set(noavx2_option)
-        elif "noblas" in dict and dict["noblas"]:
+        elif "usecpu" in dict and dict["usecpu"]:
            if default_option is not None:
                runopts_var.set(default_option)
        if "gpulayers" in dict and dict["gpulayers"]:
@ -3561,6 +3561,9 @@ def convert_outdated_args(args):
        if len(dict["hordeconfig"]) > 4:
            dict["hordekey"] = dict["hordeconfig"][3]
            dict["hordeworkername"] = dict["hordeconfig"][4]
+    if "noblas" in dict and dict["noblas"]:
+        dict["usecpu"] = True
+
    check_deprecation_warning()
    return args

@ -4010,7 +4013,7 @@ def main(launch_args,start_server=True):

    if args.gpulayers:
        shouldavoidgpu = False
-        if args.noblas and sys.platform!="darwin":
+        if args.usecpu and sys.platform!="darwin":
            shouldavoidgpu = True
            if args.gpulayers and args.gpulayers>0:
                print("WARNING: GPU layers is set, but a GPU backend was not selected! GPU will not be used!")
@ -4026,7 +4029,7 @@ def main(launch_args,start_server=True):
                fetch_gpu_properties(False,True,True)
                pass
            if args.gpulayers==-1:
-                if MaxMemory[0] > 0 and (not args.noblas) and (args.usecublas or (args.usevulkan is not None) or args.useclblast or sys.platform=="darwin"):
+                if MaxMemory[0] > 0 and (not args.usecpu) and (args.usecublas or (args.usevulkan is not None) or args.useclblast or sys.platform=="darwin"):
                    extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj)
                    layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.blasbatchsize)
                    print(f"Auto Recommended GPU Layers: {layeramt}")
@ -4291,7 +4294,7 @@ def main(launch_args,start_server=True):
            s_pp = float(benchmaxctx-benchlen)/t_pp
            s_gen = float(benchlen)/t_gen
            datetimestamp = datetime.now(timezone.utc)
-            benchflagstr = f"NoAVX2={args.noavx2} Threads={args.threads} HighPriority={args.highpriority} NoBlas={args.noblas} Cublas_Args={args.usecublas} Tensor_Split={args.tensor_split} BlasThreads={args.blasthreads} BlasBatchSize={args.blasbatchsize} FlashAttention={args.flashattention} KvCache={args.quantkv}"
+            benchflagstr = f"NoAVX2={args.noavx2} Threads={args.threads} HighPriority={args.highpriority} Cublas_Args={args.usecublas} Tensor_Split={args.tensor_split} BlasThreads={args.blasthreads} BlasBatchSize={args.blasbatchsize} FlashAttention={args.flashattention} KvCache={args.quantkv}"
            print(f"\nBenchmark Completed - v{KcppVersion} Results:\n======")
            print(f"Flags: {benchflagstr}")
            print(f"Timestamp: {datetimestamp}")
@ -4385,7 +4388,7 @@ if __name__ == '__main__':
    compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs. For hipBLAS binaries, please check YellowRoseCx rocm fork.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq] [rowsplit]'), choices=['normal', 'lowvram', '0', '1', '2', '3', 'mmq', 'rowsplit'])
    compatgroup.add_argument("--usevulkan", help="Use Vulkan for GPU Acceleration. Can optionally specify GPU Device ID (e.g. --usevulkan 0).", metavar=('[Device ID]'), nargs='*', type=int, default=None)
    compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
-    compatgroup.add_argument("--noblas", help="Do not use any GPU acceleration (CPU Only)", action='store_true')
+    compatgroup.add_argument("--usecpu", help="Do not use any GPU acceleration (CPU Only)", action='store_true')
    parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 4096). Supported values are [256,512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768,49152,65536,98304,131072]. IF YOU USE ANYTHING ELSE YOU ARE ON YOUR OWN.",metavar=('[256,512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768,49152,65536,98304,131072]'), type=check_range(int,256,262144), default=4096)
    parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU. Set to -1 to try autodetect, set to 0 to disable GPU offload.",metavar=('[GPU layers]'), nargs='?', const=1, type=int, default=-1)
    parser.add_argument("--tensor_split", help="For CUDA and Vulkan only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+')
@ -4450,5 +4453,6 @@ if __name__ == '__main__':
    deprecatedgroup = parser.add_argument_group('Deprecated Commands, DO NOT USE!')
    deprecatedgroup.add_argument("--hordeconfig", help=argparse.SUPPRESS, nargs='+')
    deprecatedgroup.add_argument("--sdconfig", help=argparse.SUPPRESS, nargs='+')
+    compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true')

    main(parser.parse_args(),start_server=True)