alias noblas to usecpu

This commit is contained in:
Concedo 2024-09-15 21:25:48 +08:00
parent ab41e324d6
commit a4249abe5d

View file

@ -329,27 +329,27 @@ def init_library():
use_noavx2 = True
if args.useclblast:
if not file_exists(lib_clblast_noavx2) or (os.name=='nt' and not file_exists("clblast.dll")):
print("Warning: NoAVX2 CLBlast library file not found. Non-BLAS library will be used.")
print("Warning: NoAVX2 CLBlast library file not found. CPU library will be used.")
else:
print("Attempting to use NoAVX2 CLBlast library for faster prompt ingestion. A compatible clblast will be required.")
use_clblast = True
elif (args.usevulkan is not None):
if not file_exists(lib_vulkan_noavx2):
print("Warning: NoAVX2 Vulkan library file not found. Non-BLAS library will be used.")
print("Warning: NoAVX2 Vulkan library file not found. CPU library will be used.")
else:
print("Attempting to use NoAVX2 Vulkan library for faster prompt ingestion. A compatible Vulkan will be required.")
use_vulkan = True
else:
if not file_exists(lib_noavx2):
print("Warning: NoAVX2 library file not found. Failsafe library will be used.")
elif (args.noblas and args.nommap):
elif (args.usecpu and args.nommap):
use_failsafe = True
print("!!! Attempting to use FAILSAFE MODE !!!")
else:
print("Attempting to use non-avx2 compatibility library.")
elif (args.usecublas is not None):
if not file_exists(lib_cublas) and not file_exists(lib_hipblas):
print("Warning: CuBLAS library file not found. Non-BLAS library will be used.")
print("Warning: CuBLAS library file not found. CPU library will be used.")
else:
if file_exists(lib_cublas):
print("Attempting to use CuBLAS library for faster prompt ingestion. A compatible CuBLAS will be required.")
@ -359,18 +359,18 @@ def init_library():
use_hipblas = True
elif (args.usevulkan is not None):
if not file_exists(lib_vulkan):
print("Warning: Vulkan library file not found. Non-BLAS library will be used.")
print("Warning: Vulkan library file not found. CPU library will be used.")
else:
print("Attempting to use Vulkan library for faster prompt ingestion. A compatible Vulkan will be required.")
use_vulkan = True
elif args.useclblast:
if not file_exists(lib_clblast) or (os.name=='nt' and not file_exists("clblast.dll")):
print("Warning: CLBlast library file not found. Non-BLAS library will be used.")
print("Warning: CLBlast library file not found. CPU library will be used.")
else:
print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.")
use_clblast = True
else:
print("Attempting to use Non-BLAS library.")
print("Attempting to use CPU library.")
if use_noavx2:
if use_failsafe:
@ -3026,12 +3026,12 @@ def show_gui():
if gpulayers_var.get():
args.gpulayers = int(gpulayers_var.get())
if runopts_var.get()=="Use CPU":
args.noblas = True
args.usecpu = True
if runopts_var.get()=="Use CPU (Old CPU)":
args.noavx2 = True
if runopts_var.get()=="Failsafe Mode (Old CPU)":
args.noavx2 = True
args.noblas = True
args.usecpu = True
args.nommap = True
if tensor_split_str_vars.get()!="":
tssv = tensor_split_str_vars.get()
@ -3176,13 +3176,13 @@ def show_gui():
gpu_choice_var.set(str(opt+1))
break
elif "noavx2" in dict and "noblas" in dict and dict["noblas"] and dict["noavx2"]:
elif "noavx2" in dict and "usecpu" in dict and dict["usecpu"] and dict["noavx2"]:
if failsafe_option is not None:
runopts_var.set(failsafe_option)
elif "noavx2" in dict and dict["noavx2"]:
if noavx2_option is not None:
runopts_var.set(noavx2_option)
elif "noblas" in dict and dict["noblas"]:
elif "usecpu" in dict and dict["usecpu"]:
if default_option is not None:
runopts_var.set(default_option)
if "gpulayers" in dict and dict["gpulayers"]:
@ -3561,6 +3561,9 @@ def convert_outdated_args(args):
if len(dict["hordeconfig"]) > 4:
dict["hordekey"] = dict["hordeconfig"][3]
dict["hordeworkername"] = dict["hordeconfig"][4]
if "noblas" in dict and dict["noblas"]:
dict["usecpu"] = True
check_deprecation_warning()
return args
@ -4010,7 +4013,7 @@ def main(launch_args,start_server=True):
if args.gpulayers:
shouldavoidgpu = False
if args.noblas and sys.platform!="darwin":
if args.usecpu and sys.platform!="darwin":
shouldavoidgpu = True
if args.gpulayers and args.gpulayers>0:
print("WARNING: GPU layers is set, but a GPU backend was not selected! GPU will not be used!")
@ -4026,7 +4029,7 @@ def main(launch_args,start_server=True):
fetch_gpu_properties(False,True,True)
pass
if args.gpulayers==-1:
if MaxMemory[0] > 0 and (not args.noblas) and (args.usecublas or (args.usevulkan is not None) or args.useclblast or sys.platform=="darwin"):
if MaxMemory[0] > 0 and (not args.usecpu) and (args.usecublas or (args.usevulkan is not None) or args.useclblast or sys.platform=="darwin"):
extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj)
layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.blasbatchsize)
print(f"Auto Recommended GPU Layers: {layeramt}")
@ -4291,7 +4294,7 @@ def main(launch_args,start_server=True):
s_pp = float(benchmaxctx-benchlen)/t_pp
s_gen = float(benchlen)/t_gen
datetimestamp = datetime.now(timezone.utc)
benchflagstr = f"NoAVX2={args.noavx2} Threads={args.threads} HighPriority={args.highpriority} NoBlas={args.noblas} Cublas_Args={args.usecublas} Tensor_Split={args.tensor_split} BlasThreads={args.blasthreads} BlasBatchSize={args.blasbatchsize} FlashAttention={args.flashattention} KvCache={args.quantkv}"
benchflagstr = f"NoAVX2={args.noavx2} Threads={args.threads} HighPriority={args.highpriority} Cublas_Args={args.usecublas} Tensor_Split={args.tensor_split} BlasThreads={args.blasthreads} BlasBatchSize={args.blasbatchsize} FlashAttention={args.flashattention} KvCache={args.quantkv}"
print(f"\nBenchmark Completed - v{KcppVersion} Results:\n======")
print(f"Flags: {benchflagstr}")
print(f"Timestamp: {datetimestamp}")
@ -4385,7 +4388,7 @@ if __name__ == '__main__':
compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs. For hipBLAS binaries, please check YellowRoseCx rocm fork.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq] [rowsplit]'), choices=['normal', 'lowvram', '0', '1', '2', '3', 'mmq', 'rowsplit'])
compatgroup.add_argument("--usevulkan", help="Use Vulkan for GPU Acceleration. Can optionally specify GPU Device ID (e.g. --usevulkan 0).", metavar=('[Device ID]'), nargs='*', type=int, default=None)
compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
compatgroup.add_argument("--noblas", help="Do not use any GPU acceleration (CPU Only)", action='store_true')
compatgroup.add_argument("--usecpu", help="Do not use any GPU acceleration (CPU Only)", action='store_true')
parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 4096). Supported values are [256,512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768,49152,65536,98304,131072]. IF YOU USE ANYTHING ELSE YOU ARE ON YOUR OWN.",metavar=('[256,512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768,49152,65536,98304,131072]'), type=check_range(int,256,262144), default=4096)
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU. Set to -1 to try autodetect, set to 0 to disable GPU offload.",metavar=('[GPU layers]'), nargs='?', const=1, type=int, default=-1)
parser.add_argument("--tensor_split", help="For CUDA and Vulkan only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+')
@ -4450,5 +4453,6 @@ if __name__ == '__main__':
deprecatedgroup = parser.add_argument_group('Deprecated Commands, DO NOT USE!')
deprecatedgroup.add_argument("--hordeconfig", help=argparse.SUPPRESS, nargs='+')
deprecatedgroup.add_argument("--sdconfig", help=argparse.SUPPRESS, nargs='+')
compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true')
main(parser.parse_args(),start_server=True)