diff --git a/Makefile b/Makefile index 44e148523..70d239b88 100644 --- a/Makefile +++ b/Makefile @@ -75,6 +75,7 @@ FASTCXXFLAGS = $(subst -O3,-Ofast,$(CXXFLAGS)) # these are used on windows, to build some libraries with extra old device compatibility SIMPLECFLAGS = +SIMPLERCFLAGS = FULLCFLAGS = NONECFLAGS = @@ -91,6 +92,7 @@ CUBLAS_OBJS = OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o ggml-cpu-aarch64.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm.o common.o sampling.o OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants_noavx2.o ggml-cpu-aarch64_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx2.o common.o sampling.o +OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants_noavx1.o ggml-cpu-aarch64_noavx1.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx1.o common.o sampling.o OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants_failsafe.o ggml-cpu-aarch64_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_failsafe.o common.o sampling.o # OS specific @@ -148,6 +150,7 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64)) CFLAGS += NONECFLAGS += SIMPLECFLAGS += -mavx -msse3 + SIMPLERCFLAGS += -mavx ifdef LLAMA_NOAVX2 FULLCFLAGS += -msse3 -mavx else @@ -161,6 +164,7 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64)) CFLAGS += NONECFLAGS += SIMPLECFLAGS += -mavx -msse3 + SIMPLERCFLAGS += -mavx ifdef LLAMA_NOAVX2 FULLCFLAGS += -msse3 -mavx else @@ -462,7 +466,7 @@ ggml_v4_clblast.o: ggml/src/ggml.c ggml/include/ggml.h ggml_v4_cublas.o: ggml/src/ggml.c ggml/include/ggml.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ ggml_v4_clblast_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h - $(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ + $(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ ggml_v4_vulkan.o: ggml/src/ggml.c ggml/include/ggml.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(VULKAN_FLAGS) -c $< -o $@ ggml_v4_vulkan_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h @@ -478,19 +482,23 @@ ggml-cpu_v4_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h ggml-cpu_v4_clblast.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ ggml-cpu_v4_clblast_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h - $(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ + $(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ #quants ggml-quants.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h $(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@ ggml-quants_noavx2.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h $(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@ +ggml-quants_noavx1.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h + $(CC) $(CFLAGS) $(SIMPLERCFLAGS) -c $< -o $@ ggml-quants_failsafe.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h $(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@ ggml-cpu-quants.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h $(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@ ggml-cpu-quants_noavx2.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h $(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@ +ggml-cpu-quants_noavx1.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h + $(CC) $(CFLAGS) $(SIMPLERCFLAGS) -c $< -o $@ ggml-cpu-quants_failsafe.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h $(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@ @@ -499,6 +507,8 @@ ggml-cpu-aarch64.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h g $(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@ ggml-cpu-aarch64_noavx2.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h $(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@ +ggml-cpu-aarch64_noavx1.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h + $(CXX) $(CXXFLAGS) $(SIMPLERCFLAGS) -c $< -o $@ ggml-cpu-aarch64_failsafe.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h $(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@ @@ -507,6 +517,8 @@ sgemm.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm $(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@ sgemm_noavx2.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h $(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@ +sgemm_noavx1.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h + $(CXX) $(CXXFLAGS) $(SIMPLERCFLAGS) -c $< -o $@ sgemm_failsafe.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h $(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@ @@ -562,7 +574,7 @@ ggml_v3_clblast.o: otherarch/ggml_v3.c otherarch/ggml_v3.h ggml_v3_cublas.o: otherarch/ggml_v3.c otherarch/ggml_v3.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ ggml_v3_clblast_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h - $(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ + $(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ #version 2 libs ggml_v2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h @@ -576,7 +588,7 @@ ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h ggml_v2_cublas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ ggml_v2_clblast_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h - $(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ + $(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ #extreme old version compat ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h @@ -692,7 +704,7 @@ ifdef CLBLAST_BUILD koboldcpp_clblast: ggml_v4_clblast.o ggml-cpu_v4_clblast.o ggml_v3_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS) $(CLBLAST_BUILD) ifdef NOAVX2_BUILD -koboldcpp_clblast_noavx2: ggml_v4_clblast_noavx2.o ggml-cpu_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLE) $(OBJS) +koboldcpp_clblast_noavx2: ggml_v4_clblast_noavx2.o ggml-cpu_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLER) $(OBJS) $(CLBLAST_BUILD) else koboldcpp_clblast_noavx2: diff --git a/class.py b/class.py index c29271344..6e1f0d70c 100644 --- a/class.py +++ b/class.py @@ -99,7 +99,7 @@ class model_backend(InferenceModel): "extra_classes": "", 'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use CuBLAS', 'value': 1}, {'text': 'Use CLBLast GPU #1', 'value': 2},{'text': 'Use CLBLast GPU #2', 'value': 3},{'text': 'Use CLBLast GPU #3', 'value': 4} - ,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 5},{'text': 'Failsafe Mode (Old CPU)', 'value': 6},{'text': 'Use Vulkan GPU #1', 'value': 7},{'text': 'Use Vulkan GPU #2', 'value': 8}], + ,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 5},{'text': 'Failsafe Mode (Older CPU)', 'value': 6},{'text': 'Use Vulkan GPU #1', 'value': 7},{'text': 'Use Vulkan GPU #2', 'value': 8}], }) requested_parameters.append({ "uitype": "text", diff --git a/koboldcpp.py b/koboldcpp.py index 1587bb3fa..33b9f6559 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -359,9 +359,9 @@ lib_option_pairs = [ (lib_hipblas, "Use hipBLAS (ROCm)"), (lib_vulkan, "Use Vulkan"), (lib_noavx2, "Use CPU (Old CPU)"), - (lib_clblast_noavx2, "Use CLBlast (Old CPU)"), (lib_vulkan_noavx2, "Use Vulkan (Old CPU)"), - (lib_failsafe, "Failsafe Mode (Old CPU)")] + (lib_clblast_noavx2, "Use CLBlast (Older CPU)"), + (lib_failsafe, "Failsafe Mode (Older CPU)")] default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs) runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)] @@ -3124,7 +3124,7 @@ def show_gui(): nl = '\n' tooltxt = "Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "") num_backends_built = makelabel(parent, str(len(runopts)) + "/8", 5, 2,tooltxt) - num_backends_built.grid(row=1, column=1, padx=195, pady=0) + num_backends_built.grid(row=1, column=1, padx=205, pady=0) num_backends_built.configure(text_color="#00ff00") def gui_changed_modelfile(*args): @@ -3143,7 +3143,7 @@ def show_gui(): predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())])) max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "") index = runopts_var.get() - gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)") + gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)") layercounter_label.grid(row=6, column=1, padx=75, sticky="W") quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W") if sys.platform=="darwin" and gpulayers_var.get()=="-1": @@ -3174,7 +3174,7 @@ def show_gui(): if v == "Use Vulkan" or v == "Use Vulkan (Old CPU)": quick_gpuname_label.configure(text=VKDevicesNames[s]) gpuname_label.configure(text=VKDevicesNames[s]) - elif v == "Use CLBlast" or v == "Use CLBlast (Old CPU)": + elif v == "Use CLBlast" or v == "Use CLBlast (Older CPU)": quick_gpuname_label.configure(text=CLDevicesNames[s]) gpuname_label.configure(text=CLDevicesNames[s]) else: @@ -3231,12 +3231,12 @@ def show_gui(): global runmode_untouched runmode_untouched = False index = runopts_var.get() - if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": + if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W") gpuname_label.grid(row=3, column=1, padx=75, sticky="W") gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") - if index == "Use CLBlast" or index == "Use CLBlast (Old CPU)": + if index == "Use CLBlast" or index == "Use CLBlast (Older CPU)": gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") CUDA_gpu_selector_box.grid_remove() @@ -3280,7 +3280,7 @@ def show_gui(): else: quick_use_flashattn.grid(row=22, column=1, padx=8, pady=1, stick="nw") - if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": + if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw") gpu_layers_entry.grid(row=6, column=1, padx=8, pady=1, stick="nw") quick_gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw") @@ -3302,7 +3302,7 @@ def show_gui(): # presets selector makelabel(quick_tab, "Presets:", 1,0,"Select a backend to use.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nVulkan and CLBlast works on all GPUs but is somewhat slower.\nOtherwise, runs on CPU only.\nNoAVX2 and Failsafe modes support older PCs.") - runoptbox = ctk.CTkComboBox(quick_tab, values=runopts, width=180,variable=runopts_var, state="readonly") + runoptbox = ctk.CTkComboBox(quick_tab, values=runopts, width=190,variable=runopts_var, state="readonly") runoptbox.grid(row=1, column=1,padx=8, stick="nw") runoptbox.set(runopts[0]) # Set to first available option @@ -3636,9 +3636,9 @@ def show_gui(): args.noavx2 = False if gpu_choice_var.get()!="All": gpuchoiceidx = int(gpu_choice_var.get())-1 - if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Old CPU)": + if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Older CPU)": args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx] - if runopts_var.get() == "Use CLBlast (Old CPU)": + if runopts_var.get() == "Use CLBlast (Older CPU)": args.noavx2 = True if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)": if gpu_choice_var.get()=="All": @@ -3664,7 +3664,7 @@ def show_gui(): args.usecpu = True if runopts_var.get()=="Use CPU (Old CPU)": args.noavx2 = True - if runopts_var.get()=="Failsafe Mode (Old CPU)": + if runopts_var.get()=="Failsafe Mode (Older CPU)": args.noavx2 = True args.usecpu = True args.nommap = True @@ -4517,6 +4517,9 @@ def main(launch_args,start_server=True): global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath args = launch_args + if (args.version) and len(sys.argv) <= 2: + print(f"{KcppVersion}") # just print version and exit + return if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1): suppress_stdout() @@ -5164,6 +5167,7 @@ if __name__ == '__main__': #more advanced params advparser = parser.add_argument_group('Advanced Commands') + advparser.add_argument("--version", help="Prints version and exits.", action='store_true') advparser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+') advparser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512) advparser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0) diff --git a/version.txt b/version.txt index 84a9fa9b1..c9bb105bf 100644 --- a/version.txt +++ b/version.txt @@ -15,7 +15,7 @@ VSVersionInfo( StringTable( u'040904b0', [ - StringStruct(u'CompanyName', u'Your Company Name'), + StringStruct(u'CompanyName', u'KoboldCpp'), StringStruct(u'FileDescription', u'KoboldCpp'), StringStruct(u'InternalName', u'KoboldCpp'), StringStruct(u'LegalCopyright', u'AGPLv3'), diff --git a/version_template.txt b/version_template.txt index 7bf97509f..117cbe45d 100644 --- a/version_template.txt +++ b/version_template.txt @@ -15,7 +15,7 @@ VSVersionInfo( StringTable( u'040904b0', [ - StringStruct(u'CompanyName', u'Your Company Name'), + StringStruct(u'CompanyName', u'KoboldCpp'), StringStruct(u'FileDescription', u'KoboldCpp'), StringStruct(u'InternalName', u'KoboldCpp'), StringStruct(u'LegalCopyright', u'AGPLv3'),